Imported Upstream version 1.20.0

author Chunseok Lee <chunseok.lee@samsung.com>

Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml

index 40635d443f8b84af614be91de5d23407bb3cd96d..95e11d0f93659dcae35f3fe71c8bedf0f002c236 100644 (file)
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -102,13 +102,15 @@ test:
      testCaseFolder:
        - /compiler/angkor
        - /compiler/arser
-      - /compiler/circle2circle
+      - /compiler/circle-partitioner
        - /compiler/circle-quantizer
        - /compiler/circle-tensordump
+      - /compiler/circle2circle
        - /compiler/circlechef
        - /compiler/circledump
        - /compiler/crew
        - /compiler/cwrap
+      - /compiler/dio-hdf5
        - /compiler/foder
        - /compiler/hermes
        - /compiler/hermes-std
@@ -122,13 +124,16 @@ test:
        - /compiler/luci-eval-driver
        - /compiler/luci-pass-value-test
        - /compiler/luci-value-test
-      - /compiler/mio-circle
+      - /compiler/mio-circle04
        - /compiler/mio-tflite
+      - /compiler/mio-tflite260
        - /compiler/oops
        - /compiler/pepper-assert
+      - /compiler/pepper-csv2vec
        - /compiler/pepper-str
        - /compiler/pepper-strcast
        - /compiler/pp
+      - /compiler/rawdata2hdf5
        - /compiler/record-minmax
        - /compiler/safemain
        - /compiler/souschef
diff --git a/.gitattributes b/.gitattributes

index d36985416d57d362c1592b7cb567e6d6cd3e9b3b..3ef12efd78d946daf1f41eb786d702923fd3ea91 100644 (file)
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,16 @@
-tests/nnapi/specs/* linguist-detectable=false
-res/* linguist-detectable=false
+# Exclude from git language statistics
+tests/nnapi/specs/** linguist-detectable=false
+res/** linguist-detectable=false
+
+# Default: text file
+# - Set End-Of-Line type
+* text eol=lf
+
+# Binary - ignore text file setting
+*.caffemodel -text
+*.png -text
+*.pdf -text
+*.h5 -text
+*.tar.gz -text
+*.tflite -text
+*.bmp -text
diff --git a/compiler/angkor/CMakeLists.txt b/compiler/angkor/CMakeLists.txt

index 44b5e90584a26c6e685a6c762119f2475453b742..7f5cb88c2b6163e86072d12b64b00bb02e76119f 100644 (file)
--- a/compiler/angkor/CMakeLists.txt
+++ b/compiler/angkor/CMakeLists.txt
@@ -5,7 +5,9 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  
  # NOTE STATIC is deliberately used here to allow clients to use 'angkor' without installation
  add_library(angkor STATIC ${HEADERS} ${SOURCES})
-set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(angkor PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
  set_target_properties(angkor PROPERTIES LINKER_LANGUAGE CXX)
  target_include_directories(angkor PUBLIC include)
  target_link_libraries(angkor PRIVATE nncc_common)
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp

index 4e88f0cb7e46496b4fa7e3cce5dda0db36abb003..63121b84590d3be8ca4d9de581be5fe5abea1313 100644 (file)
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -23,30 +23,9 @@
  
  #include "arser/arser.h"
  
-using namespace arser;
+#include "Prompt.h"
  
-class Prompt
-{
-public:
-  Prompt(const std::string &command)
-  {
-    std::istringstream iss(command);
-    std::vector<std::string> token(std::istream_iterator<std::string>{iss},
-                                   std::istream_iterator<std::string>());
-    _arg = std::move(token);
-    _argv.reserve(_arg.size());
-    for (const auto &t : _arg)
-    {
-      _argv.push_back(const_cast<char *>(t.data()));
-    }
-  }
-  int argc(void) const { return _argv.size(); }
-  char **argv(void) { return _argv.data(); }
-
-private:
-  std::vector<char *> _argv;
-  std::vector<std::string> _arg;
-};
+using namespace arser;
  
  TEST(BasicTest, option)
  {
@@ -57,7 +36,7 @@ TEST(BasicTest, option)
      .nargs(0)
      .help("It provides additional details as to what the executable is doing");
  
-  Prompt prompt("./executable --verbose");
+  test::Prompt prompt("./executable --verbose");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -79,7 +58,7 @@ TEST(BasicTest, OptionalArgument)
      .type(arser::DataType::FLOAT)
      .help("Set a frequency as you provided.");
  
-  Prompt prompt("./radio --volume 5 --frequency 128.5");
+  test::Prompt prompt("./radio --volume 5 --frequency 128.5");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -103,7 +82,7 @@ TEST(BasicTest, NonRequiredOptionalArgument_NEG)
      .type(arser::DataType::INT32)
      .help("Set a volume as you provided.");
  
-  Prompt prompt("./radio"); // empty argument
+  test::Prompt prompt("./radio"); // empty argument
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -122,7 +101,7 @@ TEST(BasicTest, RequiredOptionalArgument_NEG)
      .required()
      .help("Set a volume as you provided.");
  
-  Prompt prompt("./radio");
+  test::Prompt prompt("./radio");
    /* act */ /* assert */
    EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
  }
@@ -134,7 +113,7 @@ TEST(BasicTest, OptionalMultipleArgument)
  
    arser.add_argument("--add").nargs(2).type(arser::DataType::INT32_VEC).help("Add two numbers.");
  
-  Prompt prompt("./calculator --add 3 5");
+  test::Prompt prompt("./calculator --add 3 5");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -167,8 +146,8 @@ TEST(BasicTest, MultipleOptionalArgument)
      .help("give traning data to this program.")
      .required();
  
-  Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
-                "--training_data 2 43 234 3 334");
+  test::Prompt prompt("./ml --input_path /I/am/in.put --output_path I/am/out.put "
+                      "--training_data 2 43 234 3 334");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -195,7 +174,7 @@ TEST(BasicTest, MultipleFloatValue)
      .type(arser::DataType::FLOAT_VEC)
      .help("Add two float numbers.");
  
-  Prompt prompt("./calculator --add_float 3.2 5.4");
+  test::Prompt prompt("./calculator --add_float 3.2 5.4");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -217,7 +196,7 @@ TEST(BasicTest, MultipleStringValue)
      .type(arser::DataType::STR_VEC)
      .help("insert your three favorite color");
  
-  Prompt prompt("./color_factory --three_color red blue yellow");
+  test::Prompt prompt("./color_factory --three_color red blue yellow");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -241,7 +220,7 @@ TEST(BasicTest, ExitWithFunctionCall)
  
    arser.add_argument("--name").nargs(1).type(arser::DataType::STR).help("Name your hero");
  
-  Prompt prompt("./hero --history");
+  test::Prompt prompt("./hero --history");
    /* act */ /* assert */
    EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
                "When I was young..");
@@ -258,7 +237,7 @@ TEST(BasicTest, ExitWithFunctionCallWithBind)
      .help("Show version and exit")
      .exit_with(std::bind(printVersion, "1.2.0"));
  
-  Prompt prompt("./arser --version");
+  test::Prompt prompt("./arser --version");
    /* act */ /* assert */
    EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0),
                "arser version : 1.2.0");
@@ -275,7 +254,7 @@ TEST(BasicTest, ExitWithFunctionCallWithLamda)
  
    arser.add_argument("OS").nargs(1).type(arser::DataType::STR).help("The OS you want to boot");
  
-  Prompt prompt("./computer --shutdown");
+  test::Prompt prompt("./computer --shutdown");
    /* act */ /* assert */
    EXPECT_EXIT(arser.parse(prompt.argc(), prompt.argv()), testing::ExitedWithCode(0), "Good bye..");
  }
@@ -315,7 +294,7 @@ TEST(BasicTest, DefaultValue)
      .default_value("no name")
      .help("Enter your name");
  
-  Prompt prompt("/phone --time 1 52 34 --name arser");
+  test::Prompt prompt("/phone --time 1 52 34 --name arser");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -359,7 +338,7 @@ TEST(BasicTest, shortOption)
      .help("output path of this program.")
      .required(true);
  
-  Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
+  test::Prompt prompt("./driver -i /I/am/in.put --output_path I/am/out.put");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -385,7 +364,7 @@ TEST(BasicTest, shortMultipleOption)
      .help("output path of this program.")
      .required(true);
  
-  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -411,7 +390,7 @@ TEST(BasicTest, OptWithRequiredDuplicate_NEG)
      .help("output path of this program.")
      .required(true);
  
-  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
    /* act */ /* assert */
    EXPECT_THROW(arser.parse(prompt.argc(), prompt.argv()), std::runtime_error);
  }
@@ -432,7 +411,7 @@ TEST(BasicTest, OptWithNonRequiredDuplicate)
      .help("output path of this program.")
      .required(true);
  
-  Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
+  test::Prompt prompt("./driver --in /I/am/in.put -o I/am/out.put -i /I/am/duplicate");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -449,7 +428,7 @@ TEST(BasicTest, AccumulateVectorOptions)
  
    arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC);
  
-  Prompt prompt("./driver --specify a b c --specify 1 2 3");
+  test::Prompt prompt("./driver --specify a b c --specify 1 2 3");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -473,7 +452,7 @@ TEST(BasicTest, AccumulateScalarOptions)
  
    arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
  
-  Prompt prompt("./driver --specify 1 --specify 2");
+  test::Prompt prompt("./driver --specify 1 --specify 2");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
@@ -491,7 +470,7 @@ TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG)
  
    arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
  
-  Prompt prompt("./driver --specify 1 --specify 2");
+  test::Prompt prompt("./driver --specify 1 --specify 2");
    /* act */
    arser.parse(prompt.argc(), prompt.argv());
    /* assert */
diff --git a/compiler/circle-eval-diff/CMakeLists.txt b/compiler/circle-eval-diff/CMakeLists.txt

new file mode 100644 (file)

index 0000000..4d86f80
--- /dev/null
+++ b/compiler/circle-eval-diff/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-eval-diff ${DRIVER} ${SOURCES})
+target_include_directories(circle-eval-diff PRIVATE include)
+
+target_link_libraries(circle-eval-diff arser)
+target_link_libraries(circle-eval-diff safemain)
+target_link_libraries(circle-eval-diff foder)
+target_link_libraries(circle-eval-diff loco)
+target_link_libraries(circle-eval-diff luci_import)
+target_link_libraries(circle-eval-diff luci_lang)
+target_link_libraries(circle-eval-diff luci_interpreter)
+target_link_libraries(circle-eval-diff dio_hdf5)
+target_link_libraries(circle-eval-diff vconone)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+# circle-eval-diff is executable, so we do not link it to the test.
+# Instead, we use TEST_SOURCES to specify sources uesd for tests.
+set(TEST_SOURCES
+    "src/MetricPrinter.cpp"
+    "src/Tensor.cpp")
+
+nnas_find_package(GTest REQUIRED)
+GTest_AddTest(circle_eval_diff_test ${TESTS} ${TEST_SOURCES})
+target_include_directories(circle_eval_diff_test PRIVATE src)
+target_link_libraries(circle_eval_diff_test luci_testhelper)
+target_link_libraries(circle_eval_diff_test nncc_coverage)
diff --git a/compiler/circle-eval-diff/README.md b/compiler/circle-eval-diff/README.md

new file mode 100644 (file)

index 0000000..a3727cc
--- /dev/null
+++ b/compiler/circle-eval-diff/README.md
@@ -0,0 +1,51 @@
+# circle-eval-diff
+
+_circle-eval-diff_ compares inference results of two circle models.
+
+## Use cases
+
+1. _circle-eval-diff_ can be used to evaluate reconstruction errors of quantized models.
+2. _circle-eval-diff_ can be used to verify optimization (or any kind of value-preserving conversion) is safe.
+
+## Usage
+
+Run circle-eval-diff with the following arguments.
+
+--first_input_model: first model to compare (.circle).
+
+--second_input_model: second model to compare (.circle).
+
+--first_input_data: input data for the first model (.h5, directory). Random data will be used if this argument is not given.
+
+--second_input_data: input data for the second model (.h5, directory). Random data will be used if this argument is not given.
+
+--input_data_format: input data format (h5 (default), directory).
+
+--metric: metric to compare inference results (MAE (default), etc).
+
+```
+$ ./circle-eval-diff
+  --first_input_model <first_input_model>
+  --second_input_model <second_input_model>
+  --first_input_data <first_input_data>
+  --second_input_data <second_input_data>
+  --input_data_format <data_format>
+  --metric <metric>
+```
+
+For example,
+```
+$ ./circle-eval-diff
+  --first_input_model A.circle
+  --second_input_model B.circle
+  --first_input_data A.h5
+  --second_input_data B.h5
+  --input_data_format h5
+  --metric MAE
+```
+
+It will print MAE (Mean Absolute Error) between the inference result of A.circle with A.h5 and that of B.circle with B.h5.
+
+## Note
+
+Circle models are executed by _luci-interpreter_.
diff --git a/compiler/circle-eval-diff/driver/Driver.cpp b/compiler/circle-eval-diff/driver/Driver.cpp

new file mode 100644 (file)

index 0000000..f4a12a4
--- /dev/null
+++ b/compiler/circle-eval-diff/driver/Driver.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+using namespace circle_eval_diff;
+
+namespace
+{
+
+std::string to_lower_case(std::string s)
+{
+  std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+Metric to_metric(const std::string &str)
+{
+  if (to_lower_case(str).compare("mae") == 0)
+    return Metric::MAE;
+
+  throw std::runtime_error("Unsupported metric.");
+}
+
+InputFormat to_input_format(const std::string &str)
+{
+  if (to_lower_case(str).compare("h5") == 0)
+    return InputFormat::H5;
+
+  throw std::runtime_error("Unsupported input format.");
+}
+
+void print_version(void)
+{
+  std::cout << "circle-eval-diff version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+} // namespace
+
+int entry(const int argc, char **argv)
+{
+  arser::Arser arser("Compare inference results of two circle models");
+
+  arser.add_argument("--version")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
+
+  arser.add_argument("--first_model")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("First input model filepath");
+
+  arser.add_argument("--second_model")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(true)
+    .help("Second input model filepath");
+
+  arser.add_argument("--first_input_data")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Input data filepath for the first model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--second_input_data")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Input data filepath for the second model. If not given, circle-eval-diff will run with "
+          "randomly generated data");
+
+  arser.add_argument("--metric")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("MAE")
+    .help("Metric for comparison (default: MAE)");
+
+  arser.add_argument("--input_data_format")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("h5")
+    .help("Input data format. h5/hdf5 (default) or directory");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cout << err.what() << std::endl;
+    std::cout << arser;
+    return 255;
+  }
+
+  const auto first_model_path = arser.get<std::string>("--first_model");
+  const auto second_model_path = arser.get<std::string>("--second_model");
+
+  // Default values
+  std::string first_input_data_path;
+  std::string second_input_data_path;
+  std::string metric;
+  std::string input_data_format;
+
+  if (arser["--first_input_data"])
+    first_input_data_path = arser.get<std::string>("--first_input_data");
+
+  if (arser["--second_input_data"])
+    second_input_data_path = arser.get<std::string>("--second_input_data");
+
+  if (arser["--first_input_data"] != arser["--second_input_data"])
+    throw std::runtime_error("Input data path should be given for both first_model and "
+                             "second_model, or neither must be given.");
+
+  metric = arser.get<std::string>("--metric");
+  input_data_format = arser.get<std::string>("--input_data_format");
+
+  auto ctx = std::make_unique<CircleEvalDiff::Context>();
+  {
+    ctx->first_model_path = first_model_path;
+    ctx->second_model_path = second_model_path;
+    ctx->metric = to_metric(metric);
+    ctx->input_format = to_input_format(input_data_format);
+  }
+
+  CircleEvalDiff ced(std::move(ctx));
+
+  ced.init();
+
+  ced.evalDiff(first_input_data_path, second_input_data_path);
+
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/circle-eval-diff/include/CircleEvalDiff.h b/compiler/circle-eval-diff/include/CircleEvalDiff.h

new file mode 100644 (file)

index 0000000..bf6aff4
--- /dev/null
+++ b/compiler/circle-eval-diff/include/CircleEvalDiff.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_H__
+
+#include <luci/IR/Module.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <string>
+#include <memory>
+
+namespace circle_eval_diff
+{
+
+// Forward declaration
+class ModuleEvalDiff;
+
+enum class Metric
+{
+  Undefined, // For debugging
+  MAE,
+};
+
+enum class InputFormat
+{
+  Undefined, // For debugging
+  H5,
+  // TODO Implement Random, Directory
+};
+
+class CircleEvalDiff final
+{
+public:
+  struct Context
+  {
+    std::string first_model_path;
+    std::string second_model_path;
+    Metric metric = Metric::Undefined;
+    InputFormat input_format = InputFormat::Undefined;
+  };
+
+public:
+  CircleEvalDiff(std::unique_ptr<Context> &&ctx);
+
+  ~CircleEvalDiff();
+
+  void init();
+
+  // Evaluate two circle models for the given input data and compare the results
+  void evalDiff(const std::string &first_input_data_path,
+                const std::string &second_input_data_path) const;
+
+private:
+  std::unique_ptr<Context> _ctx;
+  std::unique_ptr<ModuleEvalDiff> _runner;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/requires.cmake b/compiler/circle-eval-diff/requires.cmake

new file mode 100644 (file)

index 0000000..cae9b7c
--- /dev/null
+++ b/compiler/circle-eval-diff/requires.cmake
@@ -0,0 +1,7 @@
+require("loco")
+require("luci")
+require("luci-interpreter")
+require("dio-hdf5")
+require("safemain")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-eval-diff/src/CircleEvalDiff.cpp b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp

new file mode 100644 (file)

index 0000000..c39a113
--- /dev/null
+++ b/compiler/circle-eval-diff/src/CircleEvalDiff.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleEvalDiff.h"
+#include "ModuleEvalDiff.h"
+#include "MetricPrinter.h"
+
+#include <foder/FileLoader.h>
+#include <luci/Importer.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+std::unique_ptr<luci::Module> import(const std::string &model_path)
+{
+  // Load model from the file
+  foder::FileLoader loader{model_path};
+  std::vector<char> model_data = loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model_data.data()),
+                                 model_data.size()};
+  if (not circle::VerifyModelBuffer(verifier))
+  {
+    throw std::runtime_error("Failed to verify circle '" + model_path + "'");
+  }
+
+  auto module = luci::Importer().importModule(circle::GetModel(model_data.data()));
+
+  if (not module)
+    throw std::runtime_error("Failed to load '" + model_path + "'");
+
+  return module;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+CircleEvalDiff::CircleEvalDiff(std::unique_ptr<Context> &&ctx)
+  : _ctx(std::move(ctx)), _runner(nullptr)
+{
+}
+
+CircleEvalDiff::~CircleEvalDiff() = default;
+
+void CircleEvalDiff::init()
+{
+  // Set metric
+  std::unique_ptr<MetricPrinter> metric;
+  switch (_ctx->metric)
+  {
+    case Metric::MAE:
+      metric = std::make_unique<MAEPrinter>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported metric.");
+  }
+
+  auto first_module = import(_ctx->first_model_path);
+  auto second_module = import(_ctx->second_model_path);
+
+  // Set runner
+  switch (_ctx->input_format)
+  {
+    case InputFormat::H5:
+      _runner = std::make_unique<H5InputEvalDiff>(std::move(first_module), std::move(second_module),
+                                                  std::move(metric));
+      break;
+    default:
+      throw std::runtime_error("Unsupported input format.");
+  }
+}
+
+void CircleEvalDiff::evalDiff(const std::string &first_input_data_path,
+                              const std::string &second_input_data_path) const
+{
+  _runner->evalDiff(first_input_data_path, second_input_data_path);
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.cpp b/compiler/circle-eval-diff/src/MetricPrinter.cpp

new file mode 100644 (file)

index 0000000..d65eb9b
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+namespace
+{
+
+template <typename T> bool same_shape(const T a, const T b)
+{
+  if (a->rank() != b->rank())
+    return false;
+
+  for (uint32_t i = 0; i < a->rank(); i++)
+  {
+    if (not(a->dim(i) == b->dim(i)))
+      return false;
+  }
+
+  return true;
+}
+
+template <loco::DataType DT> std::shared_ptr<Tensor> to_fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  assert(tensor->dtype() == DT); // FIX_CALLER_UNLESS
+
+  auto fp32_tensor = std::make_shared<Tensor>();
+  {
+    fp32_tensor->dtype(loco::DataType::FLOAT32);
+    fp32_tensor->rank(tensor->rank());
+    for (uint32_t i = 0; i < tensor->rank(); i++)
+      fp32_tensor->dim(i) = tensor->dim(i);
+
+    const auto num_elems = tensor->size<DT>();
+    fp32_tensor->size<loco::DataType::FLOAT32>(num_elems);
+    for (uint32_t i = 0; i < num_elems; i++)
+      fp32_tensor->at<loco::DataType::FLOAT32>(i) = static_cast<float>(tensor->at<DT>(i));
+  }
+  return fp32_tensor;
+}
+
+std::shared_ptr<Tensor> fp32(const std::shared_ptr<Tensor> &tensor)
+{
+  switch (tensor->dtype())
+  {
+    case loco::DataType::FLOAT32:
+      return tensor;
+    case loco::DataType::U8:
+      return to_fp32<loco::DataType::U8>(tensor);
+    case loco::DataType::S16:
+      return to_fp32<loco::DataType::S16>(tensor);
+    default:
+      throw std::runtime_error("Unsupported data type.");
+  }
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void MAEPrinter::init(const luci::Module *first, const luci::Module *second)
+{
+  THROW_UNLESS(first != nullptr, "Invalid module.");
+  THROW_UNLESS(second != nullptr, "Invalid module.");
+
+  const auto first_output = loco::output_nodes(first->graph());
+  const auto second_output = loco::output_nodes(second->graph());
+
+  assert(first_output.size() == second_output.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < first_output.size(); i++)
+  {
+    const auto first_node = loco::must_cast<luci::CircleNode *>(first_output[i]);
+    const auto second_node = loco::must_cast<luci::CircleNode *>(second_output[i]);
+    assert(same_shape(first_node, second_node)); // FIX_CALLER_UNLESS
+
+    // Create tensors to store intermediate results
+    _intermediate.emplace_back();
+    _intermediate.at(i).dtype(loco::DataType::FLOAT32);
+    // NOTE Use both first_node and second_node to avoid release build break
+    _intermediate.at(i).rank(first_node->rank());
+    uint32_t num_elems = 1;
+    for (uint32_t j = 0; j < second_node->rank(); j++)
+    {
+      _intermediate.at(i).dim(j) = second_node->dim(j);
+      num_elems *= second_node->dim(j).value();
+    }
+    _intermediate.at(i).size<loco::DataType::FLOAT32>(num_elems);
+
+    // Check the buffer is initilized with zero
+    for (uint32_t j = 0; j < num_elems; j++)
+      assert(_intermediate.at(i).at<loco::DataType::FLOAT32>(j) == 0.0);
+
+    // Save output names for logging
+    _output_names.emplace_back(first_node->name());
+  }
+}
+
+void MAEPrinter::accum_absolute_error(uint32_t output_idx, const std::shared_ptr<Tensor> &a,
+                                      const std::shared_ptr<Tensor> &b)
+{
+  assert(a->dtype() == loco::DataType::FLOAT32 and
+         b->dtype() == loco::DataType::FLOAT32); // FIX_CALLER_UNLESS
+  assert(same_shape(a.get(), b.get()));          // FIX_CALLER_UNLESS
+  assert(output_idx < _intermediate.size());     // FIX_CALLER_UNLESS
+
+  for (uint32_t i = 0; i < a->size<loco::DataType::FLOAT32>(); i++)
+  {
+    _intermediate.at(output_idx).at<loco::DataType::FLOAT32>(i) +=
+      std::abs(a->at<loco::DataType::FLOAT32>(i) - b->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
+void MAEPrinter::accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                            const std::vector<std::shared_ptr<Tensor>> &second)
+{
+  assert(first.size() == second.size());        // FIX_CALLER_UNLESS
+  assert(first.size() == _intermediate.size()); // FIX_CALLER_UNLESS
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto first_output = first[output_idx];
+    const auto second_output = second[output_idx];
+
+    // Cast data to fp32 and then compute absolute error
+    const auto fp32_first_output = fp32(first_output);
+    const auto fp32_second_output = fp32(second_output);
+
+    accum_absolute_error(output_idx, fp32_first_output, fp32_second_output);
+  }
+
+  _num_data++;
+}
+
+void MAEPrinter::dump(std::ostream &os) const
+{
+  os << "Mean Absolute Error (MAE)" << std::endl;
+
+  for (uint32_t output_idx = 0; output_idx < _intermediate.size(); output_idx++)
+  {
+    const auto name = _output_names.at(output_idx);
+    const auto &inter = _intermediate.at(output_idx);
+    assert(inter.dtype() == loco::DataType::FLOAT32); // FIX_ME_UNLESS
+    const auto elem_count = inter.size<loco::DataType::FLOAT32>();
+
+    // Compute MAE
+    float mae = 0.0;
+    for (uint32_t elem_idx = 0; elem_idx < elem_count; elem_idx++)
+      mae += inter.at<loco::DataType::FLOAT32>(elem_idx);
+
+    mae = mae / elem_count;
+    mae = mae / _num_data;
+
+    os << "MAE for " << name << " is " << mae << std::endl;
+  }
+}
+
+} // namespace circle_eval_diff
+
+#undef THROW_UNLESS
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.h b/compiler/circle-eval-diff/src/MetricPrinter.h

new file mode 100644 (file)

index 0000000..b51581c
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+#define __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
+
+#include <luci/IR/Module.h>
+
+#include "Tensor.h"
+
+#include <vector>
+#include <iostream>
+
+namespace circle_eval_diff
+{
+
+// Class to print metrics
+// How to use?
+//
+// MetricPrinter metric;
+// metric.init(first_module, second_module); // optional initialization
+//
+// for (..) // Evaluate data one by one
+// {
+//   ..
+//   metric.accumulate(first_result, second_result); // accumulate results
+// }
+//
+// std::cout << &metric << std::endl; // print result
+class MetricPrinter
+{
+public:
+  virtual ~MetricPrinter() = default;
+
+  // Child class can implement this function if necessary
+  // NOTE init can be skipped
+  virtual void init(const luci::Module *, const luci::Module *) {}
+
+  // Accumulate results of comparing the first and the second model's outputs
+  virtual void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                          const std::vector<std::shared_ptr<Tensor>> &second) = 0;
+
+  // Dump the final result of the corresponding metric
+  virtual void dump(std::ostream &os) const = 0;
+};
+
+static inline std::ostream &operator<<(std::ostream &os, const MetricPrinter *m)
+{
+  m->dump(os);
+  return os;
+}
+
+// Mean Absolute Error
+class MAEPrinter final : public MetricPrinter
+{
+public:
+  void init(const luci::Module *first, const luci::Module *second);
+
+  void accumulate(const std::vector<std::shared_ptr<Tensor>> &first,
+                  const std::vector<std::shared_ptr<Tensor>> &second);
+
+  void dump(std::ostream &os) const;
+
+private:
+  void accum_absolute_error(uint32_t index, const std::shared_ptr<Tensor> &a,
+                            const std::shared_ptr<Tensor> &b);
+
+private:
+  // Store accumulated sum of absolute error for each output
+  std::vector<Tensor> _intermediate;
+  std::vector<std::string> _output_names;
+  uint32_t _num_data = 0;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_METRIC_PRINTER_H__
diff --git a/compiler/circle-eval-diff/src/MetricPrinter.test.cpp b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp

new file mode 100644 (file)

index 0000000..51ca897
--- /dev/null
+++ b/compiler/circle-eval-diff/src/MetricPrinter.test.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MetricPrinter.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph which adds constant (addition) to the input
+ *
+ *  [Input] [Const] (addition)
+ *      \   /
+ *      [Add]
+ *
+ */
+class AddGraphlet
+{
+public:
+  AddGraphlet() = default;
+
+  void init(loco::Graph *g, float addition)
+  {
+    std::vector<float> addition_val;
+    for (uint32_t i = 0; i < 16; i++)
+      addition_val.push_back(addition);
+    _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->y(_add_c);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->shape({1, 16});
+    _add->name("add");
+  }
+
+protected:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_add_c = nullptr;
+};
+
+class AddOneGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddOneGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 1.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+class AddTwoGraph : public luci::test::TestIOGraph, public AddGraphlet
+{
+public:
+  AddTwoGraph() = default;
+
+  void init(void)
+  {
+    luci::test::TestIOGraph::init({1, 4}, {1, 16});
+    AddGraphlet::init(g(), 2.0);
+
+    _add->x(input());
+
+    output()->from(_add);
+  }
+
+  std::unique_ptr<loco::Graph> graph(void) { return std::move(_g); }
+};
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> create_empty_tensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+    tensor->size<loco::DataType::FLOAT32>(numElements(node));
+  }
+
+  return tensor;
+}
+
+std::shared_ptr<Tensor> output_tensor_with_value(const luci::Module *module, float value)
+{
+  auto outputs = loco::output_nodes(module->graph());
+  assert(outputs.size() == 1);
+  auto output = *outputs.begin();
+  auto output_cnode = loco::must_cast<luci::CircleNode *>(output);
+  auto tensor = create_empty_tensor(output_cnode);
+  auto tensor_size = tensor->size<loco::DataType::FLOAT32>();
+  for (uint32_t i = 0; i < tensor_size; i++)
+  {
+    tensor->at<loco::DataType::FLOAT32>(i) = value;
+  }
+  return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+TEST(CircleEvalMetricPrinterTest, MAE_simple)
+{
+  luci::Module first;
+  AddOneGraph first_g;
+  first_g.init();
+
+  first.add(std::move(first_g.graph()));
+
+  luci::Module second;
+  AddTwoGraph second_g;
+  second_g.init();
+
+  second.add(std::move(second_g.graph()));
+
+  MAEPrinter mae;
+
+  mae.init(&first, &second);
+
+  // This test does not actually evaluate the modules, but create
+  // fake results.
+  std::vector<std::shared_ptr<Tensor>> first_result;
+  {
+    auto output = output_tensor_with_value(&first, 1.0);
+    first_result.emplace_back(output);
+  }
+
+  std::vector<std::shared_ptr<Tensor>> second_result;
+  {
+    auto output = output_tensor_with_value(&second, 2.0);
+    second_result.emplace_back(output);
+  }
+
+  mae.accumulate(first_result, second_result);
+
+  std::stringstream ss;
+  mae.dump(ss);
+  std::string result = ss.str();
+
+  EXPECT_NE(std::string::npos, result.find("MAE for output_0 is 1"));
+}
+
+TEST(CircleEvalMetricPrinterTest, MAE_init_with_null_NEG)
+{
+  MAEPrinter mae;
+
+  EXPECT_ANY_THROW(mae.init(nullptr, nullptr));
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp

new file mode 100644 (file)

index 0000000..85f9858
--- /dev/null
+++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleEvalDiff.h"
+#include "Tensor.h"
+
+#include <luci_interpreter/Interpreter.h>
+#include <dio_hdf5/HDF5Importer.h>
+
+#include <string>
+#include <stdexcept>
+#include <iostream>
+#include <cassert>
+
+using Tensor = circle_eval_diff::Tensor;
+using DataType = loco::DataType;
+using Shape = std::vector<loco::Dimension>;
+using HDF5Importer = dio::hdf5::HDF5Importer;
+
+namespace
+{
+
+// Check the type and the shape of CircleInput
+void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype, const Shape &shape)
+{
+  // Type check
+  if (dtype != input_node->dtype())
+    throw std::runtime_error("Wrong input type.");
+
+  if (shape.size() != input_node->rank())
+    throw std::runtime_error("Input rank mismatch.");
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(shape.at(i) == input_node->dim(i)))
+      throw std::runtime_error("Input shape mismatch.");
+  }
+}
+
+// Return number of elements of the node.
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elem = 1;
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    num_elem *= node->dim(i).value();
+  return num_elem;
+}
+
+// Return Tensor which has the same dtype and shape with node.
+// Buffer does not have any data yet.
+std::shared_ptr<Tensor> createEmptyTensor(const luci::CircleNode *node)
+{
+  auto tensor = std::make_shared<Tensor>();
+  {
+    tensor->dtype(node->dtype());
+    tensor->rank(node->rank());
+    for (uint32_t i = 0; i < node->rank(); i++)
+      tensor->dim(i) = node->dim(i);
+
+    switch (node->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        tensor->size<loco::DataType::FLOAT32>(numElements(node));
+        break;
+      case loco::DataType::U8:
+        tensor->size<loco::DataType::U8>(numElements(node));
+        break;
+      case loco::DataType::S16:
+        tensor->size<loco::DataType::S16>(numElements(node));
+        break;
+      case loco::DataType::S32:
+        tensor->size<loco::DataType::S32>(numElements(node));
+        break;
+      case loco::DataType::S64:
+        tensor->size<loco::DataType::S64>(numElements(node));
+        break;
+      default:
+        throw std::runtime_error("Unsupported input tensor dtype for " + node->name());
+    }
+  }
+
+  return tensor;
+}
+
+} // namespace
+
+namespace circle_eval_diff
+{
+
+void H5InputEvalDiff::evalDiff(const std::string &first_input_data_path,
+                               const std::string &second_input_data_path) const
+{
+  const auto interp = std::make_unique<luci_interpreter::Interpreter>(_first_module.get());
+
+  _metric->init(_first_module.get(), _second_module.get());
+
+  try
+  {
+    HDF5Importer first_h5(first_input_data_path);
+    first_h5.importGroup("value");
+
+    HDF5Importer second_h5(second_input_data_path);
+    second_h5.importGroup("value");
+
+    const auto first_num_data = first_h5.numData();
+    const auto second_num_data = second_h5.numData();
+
+    if (first_num_data != second_num_data)
+      throw std::runtime_error(
+        "Number of data in the first data file and the second data file mismatches.");
+
+    if (first_num_data == 0)
+      throw std::runtime_error("Input data file does not contain any record.");
+
+    const auto first_input_nodes = loco::input_nodes(_first_module->graph());
+    const auto first_num_inputs = first_input_nodes.size();
+    const auto first_output_nodes = loco::output_nodes(_first_module->graph());
+    const auto first_num_outputs = first_output_nodes.size();
+
+    const auto second_input_nodes = loco::input_nodes(_second_module->graph());
+    const auto second_num_inputs = second_input_nodes.size();
+    const auto second_output_nodes = loco::output_nodes(_second_module->graph());
+    const auto second_num_outputs = second_output_nodes.size();
+
+    for (int32_t data_idx = 0; data_idx < first_num_data; data_idx++)
+    {
+      std::cout << "Evaluating " << data_idx << "'th data" << std::endl;
+
+      if (first_num_inputs != first_h5.numInputs(data_idx) ||
+          second_num_inputs != second_h5.numInputs(data_idx))
+        throw std::runtime_error("Wrong number of inputs in " + std::to_string(data_idx) +
+                                 "th data.");
+
+      // Do inference and return output
+      auto eval = [&](HDF5Importer &h5, uint32_t num_inputs,
+                      const std::vector<loco::Node *> &input_nodes, uint32_t num_outputs,
+                      const std::vector<loco::Node *> &output_nodes) {
+        // Write input data
+        for (uint32_t input_idx = 0; input_idx < num_inputs; input_idx++)
+        {
+          const auto *input_node =
+            loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
+          assert(input_node->index() == input_idx);
+
+          auto tensor = createEmptyTensor(input_node);
+          if (h5.isRawData())
+          {
+            h5.readTensor(data_idx, input_idx, tensor->buffer());
+          }
+          else
+          {
+            DataType dtype;
+            Shape shape;
+            h5.readTensor(data_idx, input_idx, &dtype, &shape, tensor->buffer());
+
+            // Check the type and the shape of the input data is valid
+            verifyTypeShape(input_node, dtype, shape);
+          }
+
+          interp->writeInputTensor(input_node, tensor->buffer(), tensor->byte_size());
+        }
+
+        // Interpret
+        interp->interpret();
+
+        // Read output data
+        std::vector<std::shared_ptr<Tensor>> outputs;
+        for (uint32_t output_idx = 0; output_idx < num_outputs; output_idx++)
+        {
+          const auto *output_node =
+            loco::must_cast<const luci::CircleOutput *>(output_nodes[output_idx]);
+          assert(output_node->index() == output_idx);
+
+          auto tensor = createEmptyTensor(output_node);
+          interp->readOutputTensor(output_node, tensor->buffer(), tensor->byte_size());
+          outputs.emplace_back(tensor);
+        }
+
+        return outputs;
+      };
+
+      auto first_output =
+        eval(first_h5, first_num_inputs, first_input_nodes, first_num_outputs, first_output_nodes);
+      auto second_output = eval(second_h5, second_num_inputs, second_input_nodes,
+                                second_num_outputs, second_output_nodes);
+
+      // Accumulate diffs
+      _metric->accumulate(first_output, second_output);
+    }
+
+    std::cout << "Evaluation finished. Number of data: " << first_num_data << std::endl;
+  }
+  catch (const H5::Exception &e)
+  {
+    H5::Exception::printErrorStack();
+    throw std::runtime_error("HDF5 error occurred.");
+  }
+
+  // Print metric
+  std::cout << _metric.get() << std::endl;
+}
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/ModuleEvalDiff.h b/compiler/circle-eval-diff/src/ModuleEvalDiff.h

new file mode 100644 (file)

index 0000000..c7642f6
--- /dev/null
+++ b/compiler/circle-eval-diff/src/ModuleEvalDiff.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
+#define __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
+
+#include "MetricPrinter.h"
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+
+namespace circle_eval_diff
+{
+
+class ModuleEvalDiff
+{
+public:
+  ModuleEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
+                 std::unique_ptr<MetricPrinter> &&metric)
+    : _first_module(std::move(first)), _second_module(std::move(second)), _metric(std::move(metric))
+  {
+  }
+
+  virtual ~ModuleEvalDiff() = default;
+
+  // Implement this in the child class
+  virtual void evalDiff(const std::string &first_input_data_path,
+                        const std::string &second_input_data_path) const = 0;
+
+protected:
+  std::unique_ptr<luci::Module> _first_module;
+  std::unique_ptr<luci::Module> _second_module;
+  std::unique_ptr<MetricPrinter> _metric;
+};
+
+class H5InputEvalDiff final : public ModuleEvalDiff
+{
+public:
+  H5InputEvalDiff(std::unique_ptr<luci::Module> &&first, std::unique_ptr<luci::Module> &&second,
+                  std::unique_ptr<MetricPrinter> &&metric)
+    : ModuleEvalDiff(std::move(first), std::move(second), std::move(metric))
+  {
+  }
+
+  void evalDiff(const std::string &first_input_data_path,
+                const std::string &second_input_data_path) const;
+};
+
+// TODO Implement ModuleEvalDiff for random input and directory input
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_MODULE_EVAL_DIFF_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.cpp b/compiler/circle-eval-diff/src/Tensor.cpp

new file mode 100644 (file)

index 0000000..6710e8c
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <cassert>
+
+namespace circle_eval_diff
+{
+
+#define THROW_UNLESS(COND, MSG) \
+  if (not(COND))                \
+    throw std::runtime_error(MSG);
+
+template <loco::DataType DT> uint32_t Tensor::size(void) const
+{
+  assert(dtype() == DT);
+  assert(_data.size() % sizeof(typename loco::DataTypeImpl<DT>::Type) == 0);
+  return _data.size() / sizeof(typename loco::DataTypeImpl<DT>::Type);
+}
+
+template <loco::DataType DT> void Tensor::size(uint32_t l)
+{
+  assert(dtype() == DT);
+  _data.resize(l * sizeof(typename loco::DataTypeImpl<DT>::Type));
+}
+
+template <loco::DataType DT>
+const typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n) const
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<const typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &Tensor::at(uint32_t n)
+{
+  assert(dtype() == DT);
+  THROW_UNLESS(n < size<DT>(), "Access to out of buffer boundary.");
+  return *(reinterpret_cast<typename loco::DataTypeImpl<DT>::Type *>(_data.data()) + n);
+}
+
+#undef THROW_UNLESS
+
+#define INSTANTIATE(DT)                                                                 \
+  template uint32_t Tensor::size<DT>(void) const;                                       \
+  template void Tensor::size<DT>(uint32_t);                                             \
+  template const typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t) const; \
+  template typename loco::DataTypeImpl<DT>::Type &Tensor::at<DT>(uint32_t);
+
+INSTANTIATE(loco::DataType::S64);
+INSTANTIATE(loco::DataType::S32);
+INSTANTIATE(loco::DataType::S16);
+INSTANTIATE(loco::DataType::U8);
+INSTANTIATE(loco::DataType::FLOAT32);
+
+#undef INSTANTIATE
+
+} // namespace circle_eval_diff
diff --git a/compiler/circle-eval-diff/src/Tensor.h b/compiler/circle-eval-diff/src/Tensor.h

new file mode 100644 (file)

index 0000000..65ab606
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EVAL_DIFF_TENSOR_H__
+#define __CIRCLE_EVAL_DIFF_TENSOR_H__
+
+#include <loco.h>
+
+#include <vector>
+
+namespace circle_eval_diff
+{
+
+struct TensorDataType
+{
+public:
+  const loco::DataType &dtype(void) const { return _dtype; }
+  void dtype(const loco::DataType &dtype) { _dtype = dtype; }
+
+private:
+  loco::DataType _dtype = loco::DataType::Unknown;
+};
+
+struct TensorShape
+{
+public:
+  uint32_t rank(void) const { return _dims.size(); }
+  void rank(uint32_t value) { _dims.resize(value); }
+
+  const loco::Dimension &dim(uint32_t axis) const { return _dims.at(axis); }
+  loco::Dimension &dim(uint32_t axis) { return _dims.at(axis); }
+
+  void shape(std::initializer_list<uint32_t> dims)
+  {
+    rank(dims.size());
+
+    uint32_t axis = 0;
+    for (auto d : dims)
+    {
+      dim(axis++) = d;
+    }
+  }
+
+private:
+  std::vector<loco::Dimension> _dims;
+};
+
+// Tensor has three kinds of data
+// 1. DataType (_dtype)
+// 2. Shape (_dims)
+// 3. Buffer (_data)
+struct Tensor final : public TensorShape, public TensorDataType
+{
+public:
+  template <loco::DataType DT> uint32_t size(void) const;
+  template <loco::DataType DT> void size(uint32_t size);
+  template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
+  template <loco::DataType DT> typename loco::DataTypeImpl<DT>::Type &at(uint32_t n);
+  uint8_t *buffer(void) { return _data.data(); }
+  uint32_t byte_size(void) const { return _data.size(); }
+
+private:
+  std::vector<uint8_t> _data;
+};
+
+} // namespace circle_eval_diff
+
+#endif // __CIRCLE_EVAL_DIFF_TENSOR_H__
diff --git a/compiler/circle-eval-diff/src/Tensor.test.cpp b/compiler/circle-eval-diff/src/Tensor.test.cpp

new file mode 100644 (file)

index 0000000..3bdeaec
--- /dev/null
+++ b/compiler/circle-eval-diff/src/Tensor.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Tensor.h"
+
+#include <gtest/gtest.h>
+
+using Tensor = circle_eval_diff::Tensor;
+
+namespace
+{
+
+template <loco::DataType DT> void test_out_of_buffer_range()
+{
+  Tensor t;
+
+  t.shape({1, 2, 3});
+  t.dtype(DT);
+  t.size<DT>(6);
+
+  EXPECT_ANY_THROW(t.at<DT>(6));
+}
+
+template <loco::DataType DT> void test_getter_setter()
+{
+  Tensor t;
+
+  // Check shape
+  t.shape({1, 2, 3});
+  EXPECT_EQ(3, t.rank());
+  EXPECT_EQ(1, t.dim(0));
+  EXPECT_EQ(2, t.dim(1));
+  EXPECT_EQ(3, t.dim(2));
+
+  // Check dtype
+  t.dtype(DT);
+  EXPECT_EQ(DT, t.dtype());
+
+  // Check buffer
+  t.size<DT>(6);
+  EXPECT_EQ(6 * sizeof(typename loco::DataTypeImpl<DT>::Type), t.byte_size());
+  for (uint32_t i = 0; i < 6; i++)
+    t.at<DT>(i) = i;
+
+  for (uint32_t i = 0; i < 6; i++)
+    EXPECT_EQ(i, t.at<DT>(i));
+}
+
+} // namespace
+
+TEST(CircleEvalDiffTensorTest, constructor)
+{
+  Tensor t;
+
+  EXPECT_EQ(0, t.byte_size());
+  EXPECT_EQ(0, t.rank());
+  EXPECT_EQ(loco::DataType::Unknown, t.dtype());
+}
+
+TEST(CircleEvalDiffTensorTest, getter_setter)
+{
+  test_getter_setter<loco::DataType::S64>();
+  test_getter_setter<loco::DataType::S32>();
+  test_getter_setter<loco::DataType::S16>();
+  test_getter_setter<loco::DataType::U8>();
+  test_getter_setter<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_shape_range_NEG)
+{
+  Tensor t;
+  t.shape({1, 2, 2, 3});
+
+  EXPECT_ANY_THROW(t.dim(4));
+}
+
+TEST(CircleEvalDiffTensorTest, out_of_buffer_range_NEG)
+{
+  test_out_of_buffer_range<loco::DataType::S64>();
+  test_out_of_buffer_range<loco::DataType::S32>();
+  test_out_of_buffer_range<loco::DataType::S16>();
+  test_out_of_buffer_range<loco::DataType::U8>();
+  test_out_of_buffer_range<loco::DataType::FLOAT32>();
+
+  SUCCEED();
+}
diff --git a/compiler/circle-execution-plan/CMakeLists.txt b/compiler/circle-execution-plan/CMakeLists.txt

index 115d2486015ae8a8ba766360ea6dbf265e56410b..2f657c171955df920e34427bbeaefc3b2c37c746 100644 (file)
--- a/compiler/circle-execution-plan/CMakeLists.txt
+++ b/compiler/circle-execution-plan/CMakeLists.txt
@@ -1,4 +1,9 @@
  set(SOURCES
+        pal/IScratchpadHelper.h
+        pal/ScratchpadHelperLinux.h
+        pal/ScratchpadHelperMCU.h
+        pal/ScratchpadHelperCMSISNN.h
+        pal/TargetPlatform.h
          src/CircleExecutionPlan.cpp
          src/ExecutionPlanner.cpp
          src/ExecutionPlanner.h
@@ -13,4 +18,5 @@ target_link_libraries(circle_execution_plan luci_export)
  target_link_libraries(circle_execution_plan luci_plan)
  target_link_libraries(circle_execution_plan arser)
  
+target_include_directories(circle_execution_plan PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/pal")
  install(TARGETS circle_execution_plan DESTINATION bin)
diff --git a/compiler/circle-execution-plan/README.md b/compiler/circle-execution-plan/README.md

index e789a55db44417eb89df963b830f571ec4fbb858..dbb7d4f851d6d85c0d942790370f6f6dc400c083 100644 (file)
--- a/compiler/circle-execution-plan/README.md
+++ b/compiler/circle-execution-plan/README.md
@@ -10,13 +10,12 @@ The output circle file contains plan (`CircleNodeMemoryPlan`) information for ev
  - number which determines order in which nodes will be executed
  - memory offsets for node output tensors from the beginning of shared memory buffer
  
-In order to record and read this metadata, we use `CircleImportMetadata` and `CircleExportMetadata`.
-For this purpose we use `std::map<uint32_t, std::vector<uint32_t>> _memory_plan_table` which for each node with key ID contains encoded `CircleNodeMemoryPlan` data.
+In order to record and read this data, we use `luci::CircleNodeExecutionPlan`.
  
  ### Execution plan building
  
  In order to build "execution plan" we use `ExecutionPlanner` class.
-The main method is `get_execution_plan()` which for each node finds and writes to its annotations 
+The main method is `make_execution_plan()` which for each node finds and writes to its annotations 
  "execution plan". For this purpose there are two steps:
  - determining the order of execution of nodes, which is stored in `_ordered_nodes` vector.
  Now for this purpose there is only one default method `get_default_execution_order_plan()` that uses `loco::postorder_traversal(const std::vector<loco::Node *> &roots)`.
diff --git a/compiler/circle-execution-plan/pal/IScratchpadHelper.h b/compiler/circle-execution-plan/pal/IScratchpadHelper.h

new file mode 100644 (file)

index 0000000..f5a9915
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/IScratchpadHelper.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+#define CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
+
+#include <luci/IR/Nodes/CircleAveragePool2D.h>
+#include <luci/IR/Nodes/CircleBatchMatMul.h>
+#include <luci/IR/Nodes/CircleConv2D.h>
+#include <luci/IR/Nodes/CircleDepthwiseConv2D.h>
+#include <luci/IR/Nodes/CircleSVDF.h>
+#include <cstdint>
+
+namespace circle_planner
+{
+
+class IScratchpadHelper
+{
+public:
+  virtual uint32_t
+  ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) = 0;
+
+  virtual std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) = 0;
+
+  virtual uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) = 0;
+
+  virtual uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) = 0;
+
+  virtual std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) = 0;
+
+  virtual ~IScratchpadHelper() = default;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_ISRCRATCHPAD_HELPER_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h

new file mode 100644 (file)

index 0000000..5369c09
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperCMSISNN.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
+
+#include "IScratchpadHelper.h"
+#include <cassert>
+
+namespace circle_planner
+{
+
+namespace
+{
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+} // namespace
+
+class ScratchpadHelperCMSISNN : public IScratchpadHelper
+{
+public:
+  explicit ScratchpadHelperCMSISNN(bool use_dsp) : _use_dsp(use_dsp)
+  {
+    // Do nothing
+  }
+
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // Main logic of arm_avgpool_s8_get_buffer_size
+
+    const auto avg_pool_input = loco::must_cast<luci::CircleNode *>(avg_pool->value());
+
+    if (avg_pool_input->dtype() != loco::DataType::S8 or !_use_dsp)
+      return 0;
+
+    const auto depth = static_cast<int32_t>(avg_pool_input->dim(3).value());
+
+    return depth * sizeof(int32_t);
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    throw std::runtime_error("BatchMatMul is not currently supported for cmsisnn platform");
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+  {
+    // Main logic of arm_convolve_wrapper_s8_get_buffer_size
+
+    const auto dilation_height_factor = static_cast<int32_t>(conv->dilation()->h());
+    const auto dilation_width_factor = static_cast<int32_t>(conv->dilation()->w());
+
+    const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+    if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+        conv_input->dtype() != loco::DataType::S8)
+    {
+      return 0;
+    }
+
+    const auto input_depth = static_cast<int32_t>(conv_input->dim(3).value());
+
+    const auto input_height = static_cast<int32_t>(conv_input->dim(1).value());
+    const auto input_width = static_cast<int32_t>(conv_input->dim(2).value());
+
+    const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+    const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+    const auto stride_height = static_cast<int32_t>(conv->stride()->h());
+    const auto stride_width = static_cast<int32_t>(conv->stride()->w());
+
+    const auto output_height = static_cast<int32_t>(conv->dim(1).value());
+    const auto output_width = static_cast<int32_t>(conv->dim(2).value());
+
+    assert(conv_input->quantparam()->zerop.size() == 1);
+    assert(conv->quantparam()->zerop.size() == 1);
+
+    const auto padding_height = computePadding(stride_height, dilation_height_factor, input_height,
+                                               filter_height, output_height);
+    const auto padding_width =
+      computePadding(stride_width, dilation_width_factor, input_width, filter_width, output_width);
+
+    if ((padding_width == 0) && (padding_height == 0) && (input_depth % 4 == 0) &&
+        (stride_width == 1) && (stride_height == 1) && (filter_width == 1) && (filter_height == 1))
+    {
+      return 0;
+    }
+
+    if (_use_dsp)
+    {
+      return (2 * input_depth * filter_width * filter_height) * sizeof(int16_t);
+    }
+
+    return 0;
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // Main logic of arm_depthwise_conv_wrapper_s8_get_buffer_size
+
+    const auto dilation_height_factor = static_cast<int32_t>(depthwise_conv->dilation()->h());
+    const auto dilation_width_factor = static_cast<int32_t>(depthwise_conv->dilation()->w());
+
+    const auto depthwise_conv_input = loco::must_cast<luci::CircleNode *>(depthwise_conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(depthwise_conv->filter());
+
+    if (dilation_width_factor != 1 or dilation_height_factor != 1 or
+        depthwise_conv_input->dtype() != loco::DataType::S8)
+    {
+      return 0;
+    }
+
+    const auto input_depth = static_cast<int32_t>(depthwise_conv_input->dim(3).value());
+    const auto output_depth = static_cast<int32_t>(depthwise_conv->dim(3).value());
+    const auto batch_size = static_cast<int32_t>(depthwise_conv_input->dim(0).value());
+
+    if (input_depth != output_depth or batch_size != 1 or !_use_dsp)
+      return 0;
+
+    const auto filter_height = static_cast<int32_t>(filter->dim(1).value());
+    const auto filter_width = static_cast<int32_t>(filter->dim(2).value());
+
+    return input_depth * filter_height * filter_width * sizeof(int16_t);
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+
+private:
+  bool _use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_CMSISNN_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h

new file mode 100644 (file)

index 0000000..811aa67
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperLinux.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
+
+#include "IScratchpadHelper.h"
+#include <loco/IR/DataTypeTraits.h>
+
+namespace circle_planner
+{
+
+class ScratchpadHelperLinux : public IScratchpadHelper
+{
+public:
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // for linux AveragePool2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    const auto lhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->x());
+    const auto rhs = loco::must_cast<luci::CircleNode *>(batch_mat_mul->y());
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    // Scratchpad for lhs
+    uint32_t scratchpad_size = 1;
+    for (int32_t i = 0; i < lhs->rank(); ++i)
+      scratchpad_size *= lhs->dim(i).value();
+
+    scratchpad_sizes.push_back(scratchpad_size * loco::size(lhs->dtype()));
+
+    // Scratchpad for rhs
+    scratchpad_size = 1;
+    for (int32_t i = 0; i < rhs->rank(); ++i)
+      scratchpad_size *= rhs->dim(i).value();
+
+    scratchpad_sizes.push_back(scratchpad_size * loco::size(rhs->dtype()));
+
+    return scratchpad_sizes;
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *conv) final
+  {
+    const auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
+    const auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
+
+    const uint32_t stride_height = conv->stride()->h();
+    const uint32_t stride_width = conv->stride()->w();
+
+    const uint32_t dilation_height_factor = conv->dilation()->h();
+    const uint32_t dilation_width_factor = conv->dilation()->w();
+
+    const uint32_t filter_height = filter->dim(1).value();
+    const uint32_t filter_width = filter->dim(2).value();
+
+    const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
+    const bool need_non_dilated_im2col =
+      stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
+    const bool need_im2col = conv_input->dtype() != loco::DataType::S16 &&
+                             (need_dilated_im2col || need_non_dilated_im2col);
+
+    if (!need_im2col)
+    {
+      return 0;
+    }
+
+    const uint32_t input_depth = conv_input->dim(3).value();
+    const uint32_t batches = conv_input->dim(0).value();
+
+    const uint32_t output_height = conv->dim(1).value();
+    const uint32_t output_width = conv->dim(2).value();
+
+    return batches * output_height * output_width * input_depth * filter_height * filter_width *
+           size(conv_input->dtype());
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // for linux DepthwiseConv2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_LINUX_H
diff --git a/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h

new file mode 100644 (file)

index 0000000..14b4164
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/ScratchpadHelperMCU.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+#define CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
+
+#include "IScratchpadHelper.h"
+
+namespace circle_planner
+{
+
+class ScratchpadHelperMCU : public IScratchpadHelper
+{
+public:
+  uint32_t ComputeScratchpadSizeAveragePool2d(const luci::CircleAveragePool2D *avg_pool) final
+  {
+    // for mcu AveragePool2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t>
+  ComputeScratchpadSizeBatchMatMul(const luci::CircleBatchMatMul *batch_mat_mul) final
+  {
+    throw std::runtime_error("BatchMatMul is not currently supported for mcu platform");
+  }
+
+  uint32_t ComputeScratchpadSizeConv2d(const luci::CircleConv2D *) final
+  {
+    // for mcu scratchpad size = 0
+    return 0;
+  }
+
+  uint32_t
+  ComputeScratchpadSizeDepthwiseConv2d(const luci::CircleDepthwiseConv2D *depthwise_conv) final
+  {
+    // for mcu DepthwiseConv2d scratchpad tensors size = 0
+    return 0;
+  }
+
+  std::vector<uint32_t> ComputeScratchpadSizeSVDF(const luci::CircleSVDF *svdf) final
+  {
+    const auto svdf_input = loco::must_cast<luci::CircleNode *>(svdf->input());
+    const auto weight_feature_input = loco::must_cast<luci::CircleNode *>(svdf->weight_feature());
+
+    if (svdf_input->dtype() == loco::DataType::FLOAT32 and
+        (weight_feature_input->dtype() == loco::DataType::S8 or
+         weight_feature_input->dtype() == loco::DataType::U8))
+    {
+      throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+    }
+
+    std::vector<uint32_t> scratchpad_sizes;
+
+    const auto batch_size = svdf_input->dim(0).value();
+    const auto num_filters = weight_feature_input->dim(0).value();
+    const auto rank = svdf->svdf_rank();
+    const auto num_units = num_filters / rank;
+
+    if (svdf_input->dtype() == loco::DataType::S8)
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(int32_t));
+      scratchpad_sizes.push_back(batch_size * num_units * sizeof(int32_t));
+    }
+    else
+    {
+      scratchpad_sizes.push_back(batch_size * num_filters * sizeof(float));
+    }
+
+    return scratchpad_sizes;
+  }
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_SCRATCHPAD_HELPER_MCU_H
diff --git a/compiler/circle-execution-plan/pal/TargetPlatform.h b/compiler/circle-execution-plan/pal/TargetPlatform.h

new file mode 100644 (file)

index 0000000..538a502
--- /dev/null
+++ b/compiler/circle-execution-plan/pal/TargetPlatform.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+#define CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
+
+namespace circle_planner
+{
+
+enum SupportedPlatformType
+{
+  LINUX,
+  MCU,
+  CMSISNN
+};
+
+struct TargetPlatform
+{
+  SupportedPlatformType platform_type;
+  bool use_dsp;
+};
+
+} // namespace circle_planner
+
+#endif // CIRCLE_EXECUTION_PLAN_TARGET_PLATFORM_H
diff --git a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp

index a54100b8c59da1dbad225ae641e7f5337ddce878..1788124c3a14e00de0cc8c5df94d3660b123c510 100644 (file)
--- a/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
+++ b/compiler/circle-execution-plan/src/CircleExecutionPlan.cpp
@@ -35,6 +35,18 @@ int entry(int argc, char **argv)
  
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+  arser.add_argument("--platform")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .default_value("linux")
+    .help("Platform name: linux mcu cmsisnn");
+  arser.add_argument("--use_dsp")
+    .nargs(1)
+    .type(arser::DataType::BOOL)
+    .required(false)
+    .default_value(false)
+    .help("Plan with or without dsp (now can be used only with cmsisnn)");
  
    try
    {
@@ -47,8 +59,35 @@ int entry(int argc, char **argv)
      return 255;
    }
  
-  std::string input_path = arser.get<std::string>("input");
-  std::string output_path = arser.get<std::string>("output");
+  const std::string input_path = arser.get<std::string>("input");
+  const std::string output_path = arser.get<std::string>("output");
+  const std::string platform_name = arser.get<std::string>("--platform");
+  const bool use_dsp = arser.get<bool>("--use_dsp");
+
+  if (platform_name != "cmsisnn" && use_dsp)
+  {
+    std::cerr << "ERROR: Now use_dsp can be used only with cmsisnn" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  circle_planner::SupportedPlatformType platform_type;
+  if (platform_name == "linux")
+  {
+    platform_type = circle_planner::SupportedPlatformType::LINUX;
+  }
+  else if (platform_name == "mcu")
+  {
+    platform_type = circle_planner::SupportedPlatformType::MCU;
+  }
+  else if (platform_name == "cmsisnn")
+  {
+    platform_type = circle_planner::SupportedPlatformType::CMSISNN;
+  }
+  else
+  {
+    std::cerr << "ERROR: Invalid platform name '" << platform_name << "'" << std::endl;
+    return EXIT_FAILURE;
+  }
  
    foder::FileLoader file_loader{input_path};
    std::vector<char> model_data;
@@ -82,8 +121,8 @@ int entry(int argc, char **argv)
    auto module = importer.importModule(circle_model);
  
    // Do main job
-  luci::ExecutionPlanner execution_planner(module->graph());
-  execution_planner.get_execution_plan();
+  circle_planner::ExecutionPlanner execution_planner(module->graph(), {platform_type, use_dsp});
+  execution_planner.make_execution_plan();
  
    // Export to output Circle file
    luci::CircleExporter exporter;
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp

index c37d1e5f5dcbd56498de477852220747be754c09..ec2ec1362d61a9c74361e4c640a9baefccb8c8ac 100644 (file)
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.cpp
@@ -18,72 +18,49 @@
  #include <loco/IR/Algorithm.h>
  #include <luci/UserSettings.h>
  
-namespace luci
+namespace circle_planner
  {
  namespace
  {
  
-constexpr uint32_t nodeNotAssigned = std::numeric_limits<int32_t>::max();
+constexpr uint32_t node_not_assigned = std::numeric_limits<int32_t>::max();
  
-uint32_t compute_output_size(Padding padding, uint32_t image_size, uint32_t filter_size,
-                             uint32_t stride, uint32_t dilation_rate = 1)
+bool isExecutableNode(const luci::CircleNode *node)
  {
-  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
-  switch (padding)
+  switch (node->opcode())
    {
-    case Padding::SAME:
-      return (image_size + stride - 1) / stride;
-    case Padding::VALID:
-      return (image_size + stride - effective_filter_size) / stride;
+    // The following nodes denote outputs of multiple-output nodes.
+    // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      return false;
      default:
-      assert(false);
+      return true;
    }
  }
  
-// Method finds (if necessary) size for im2col temporary tensor.
-uint32_t compute_im2col_size(const luci::CircleConv2D *conv)
+bool isTensorProducingNode(const luci::CircleNode *node)
  {
-  auto conv_input = loco::must_cast<luci::CircleNode *>(conv->input());
-  auto filter = loco::must_cast<luci::CircleNode *>(conv->filter());
-  auto padding = (conv->padding());
-  uint32_t stride_height = conv->stride()->h();
-  uint32_t stride_width = conv->stride()->w();
-
-  uint32_t dilation_height_factor = conv->dilation()->h();
-  uint32_t dilation_width_factor = conv->dilation()->w();
-
-  uint32_t filter_height = filter->dim(1).value();
-  uint32_t filter_width = filter->dim(2).value();
-
-  const bool need_dilated_im2col = dilation_height_factor != 1 || dilation_width_factor != 1;
-  const bool need_non_dilated_im2col =
-    stride_height != 1 || stride_width != 1 || filter_height != 1 || filter_width != 1;
-  bool need_im2col =
-    conv_input->dtype() != loco::DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
-
-  if (!need_im2col)
+  switch (node->opcode())
    {
-    return 0;
+    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+    // are produced by the corresponding *Out nodes instead.
+    // The list is synchronized with the same list from luci-interpreter/src/loader/GraphLoader.cpp
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::UNPACK:
+      return false;
+    default:
+      return true;
    }
-
-  uint32_t input_depth = conv_input->dim(3).value();
-  uint32_t input_height = conv_input->dim(1).value();
-  uint32_t input_width = conv_input->dim(2).value();
-
-  uint32_t output_height = compute_output_size(padding, input_height, filter_height, stride_height,
-                                               dilation_height_factor);
-  uint32_t output_width =
-    compute_output_size(padding, input_width, filter_width, stride_width, dilation_width_factor);
-
-  uint32_t batches = conv_input->dim(0).value();
-
-  return batches * output_height * output_width * input_depth * filter_height * filter_width *
-         size(conv_input->dtype());
  }
  
  } // namespace
  
-void ExecutionPlanner::get_execution_plan()
+void ExecutionPlanner::make_execution_plan()
  {
    get_default_execution_order_plan();
    _required_size = get_offsets_with_greedy_by_size();
@@ -106,23 +83,23 @@ void ExecutionPlanner::get_default_execution_order_plan()
  void ExecutionPlanner::get_usage_interval()
  {
    // Initialize vectors of first and last nodes for usage interval
-  _alloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
-  _dealloc_node.assign(_ordered_nodes.size(), nodeNotAssigned);
+  _alloc_node.assign(_ordered_nodes.size(), node_not_assigned);
+  _dealloc_node.assign(_ordered_nodes.size(), node_not_assigned);
  
    // Vector for count usages
    std::vector<int> usages_counts(_ordered_nodes.size(), 0);
  
    auto allocate = [this](uint32_t node, uint32_t tensor) {
-    if (_alloc_node[tensor] != nodeNotAssigned)
+    if (_alloc_node[tensor] != node_not_assigned)
      {
        return;
      }
-    assert(_dealloc_node[tensor] == nodeNotAssigned);
+    assert(_dealloc_node[tensor] == node_not_assigned);
      _alloc_node[tensor] = node;
    };
  
    auto deallocate = [this](uint32_t node, uint32_t tensor) {
-    assert(_dealloc_node[tensor] == nodeNotAssigned);
+    assert(_dealloc_node[tensor] == node_not_assigned);
      _dealloc_node[tensor] = node;
    };
  
@@ -158,13 +135,24 @@ void ExecutionPlanner::get_usage_interval()
    for (uint32_t i = 0; i < _ordered_nodes.size(); i++)
    {
      const auto node = _ordered_nodes.at(i);
+    auto prev_nodes = preds(node);
      if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
      {
        allocate(0, i);
      }
-    allocate(i, i);
+    else if (!isExecutableNode(loco::must_cast<luci::CircleNode *>(node)))
+    {
+      // If current node is multi output node than begin life time for current node should start
+      // when prev node start live
+      auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), *prev_nodes.begin());
+      size_t index = std::distance(_ordered_nodes.begin(), it);
+      allocate(index, i);
+    }
+    else
+    {
+      allocate(i, i);
+    }
  
-    auto prev_nodes = preds(node);
      for (auto &prev_node : prev_nodes)
      {
        auto it = std::find(_ordered_nodes.begin(), _ordered_nodes.end(), prev_node);
@@ -203,7 +191,7 @@ uint32_t ExecutionPlanner::get_offsets_with_greedy_by_size()
  uint32_t ExecutionPlanner::greedy_by_size_approach()
  {
    size_t result_size = 0;
-  create_alloc_node_inform_vector(false, false, false);
+  create_alloc_node_inform_vector(_is_null_consts, _is_null_inputs, _is_null_scratchpads);
    std::vector<AllocationNodeInformation> ordered_alloc_inform;
    for (auto &current_node : _alloc_node_inform_vector)
    {
@@ -250,22 +238,22 @@ uint32_t ExecutionPlanner::greedy_by_size_approach()
  }
  
  void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool null_inputs,
-                                                       bool null_im2col)
+                                                       bool null_scratchpad)
  {
    auto node_compare = [this](const AllocationNodeInformation &alloc_1,
                               const AllocationNodeInformation &alloc_2) {
      auto idx1 = alloc_1.node_num;
      auto idx2 = alloc_2.node_num;
  
-    if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == nodeNotAssigned)
+    if (this->_alloc_node[idx1] == 0 && this->_dealloc_node[idx1] == node_not_assigned)
      {
-      if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+      if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
        {
          return idx1 < idx2;
        }
        return true;
      }
-    if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == nodeNotAssigned)
+    if (this->_alloc_node[idx2] == 0 && this->_dealloc_node[idx2] == node_not_assigned)
      {
        return false;
      }
@@ -305,30 +293,66 @@ void ExecutionPlanner::create_alloc_node_inform_vector(bool null_consts, bool nu
      {
        _alloc_node_inform_vector[i].size = 0;
      }
+    else if (!isTensorProducingNode(circle_node))
+    {
+      _alloc_node_inform_vector[i].size = 0;
+    }
      else
      {
        _alloc_node_inform_vector[i].size = node_size;
      }
  
-    // Im2col
-    auto opcode = circle_node->opcode();
-    if (opcode == luci::CircleOpcode::CONV_2D)
+    // Scratchpad If needed
+    std::vector<uint32_t> scratchpad_sizes;
+    if (!null_scratchpad)
      {
-      auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
-      auto im2col_size = compute_im2col_size(conv);
-      if (im2col_size > 0)
+      switch (circle_node->opcode())
        {
-        AllocationNodeInformation temp_alloc;
-
-        if (null_im2col)
+        case luci::CircleOpcode::AVERAGE_POOL_2D:
          {
-          temp_alloc.size = 0;
+          const auto avg_pool = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+          scratchpad_sizes.push_back(
+            _scratchpad_helper->ComputeScratchpadSizeAveragePool2d(avg_pool));
+          break;
          }
-        else
+        case luci::CircleOpcode::BATCH_MATMUL:
          {
-          temp_alloc.size = im2col_size;
+          const auto batch_mat_mul = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+          scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeBatchMatMul(batch_mat_mul);
+          break;
          }
+        case luci::CircleOpcode::CONV_2D:
+        {
+          const auto conv = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+          scratchpad_sizes.push_back(_scratchpad_helper->ComputeScratchpadSizeConv2d(conv));
+          break;
+        }
+        case luci::CircleOpcode::DEPTHWISE_CONV_2D:
+        {
+          const auto depthwise_conv =
+            loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+          scratchpad_sizes.push_back(
+            _scratchpad_helper->ComputeScratchpadSizeDepthwiseConv2d(depthwise_conv));
+          break;
+        }
+        case luci::CircleOpcode::SVDF:
+        {
+          const auto svdf = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+          scratchpad_sizes = _scratchpad_helper->ComputeScratchpadSizeSVDF(svdf);
+          break;
+        }
+        default:
+          break;
+      }
+    }
+
+    for (const auto scratchpad_size : scratchpad_sizes)
+    {
+      if (scratchpad_size > 0)
+      {
+        AllocationNodeInformation temp_alloc;
  
+        temp_alloc.size = scratchpad_size;
          temp_alloc.first_node = i - 1;
          temp_alloc.last_node = i + 1;
          temp_alloc.node_num = i;
@@ -352,7 +376,7 @@ void ExecutionPlanner::dump_inform()
    {
      auto current_node_it = std::find_if(
        _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
-      [this, i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
+      [i](const AllocationNodeInformation &x) { return x.node_num == i && !x.is_temp; });
      for (uint32_t j = 0; j < _ordered_nodes.size(); j++)
      {
        auto first_node = _alloc_node[j];
@@ -360,7 +384,7 @@ void ExecutionPlanner::dump_inform()
  
        auto it = std::find_if(
          _alloc_node_inform_vector.begin(), _alloc_node_inform_vector.end(),
-        [this, j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
+        [j](const AllocationNodeInformation &x) { return x.node_num == j && !x.is_temp; });
        if (i >= first_node && i <= last_node)
        {
          current_node_it->breadth += it->size;
@@ -386,4 +410,4 @@ void ExecutionPlanner::dump_inform()
              });
  }
  
-} // namespace luci
+} // namespace circle_planner
diff --git a/compiler/circle-execution-plan/src/ExecutionPlanner.h b/compiler/circle-execution-plan/src/ExecutionPlanner.h

index 8e3d9b46a1cd76cb066c87b7d098e35353eba6ba..e0833c407e080408997b990c0d71def0ff1fe918 100644 (file)
--- a/compiler/circle-execution-plan/src/ExecutionPlanner.h
+++ b/compiler/circle-execution-plan/src/ExecutionPlanner.h
@@ -17,10 +17,15 @@
  #ifndef CIRCLE_EXECUTION_PLANNER_H
  #define CIRCLE_EXECUTION_PLANNER_H
  
+#include "TargetPlatform.h"
+#include "IScratchpadHelper.h"
+#include "ScratchpadHelperLinux.h"
+#include "ScratchpadHelperMCU.h"
+#include "ScratchpadHelperCMSISNN.h"
  #include <luci/IR/Module.h>
  #include <luci/Plan/CircleNodeExecutionPlan.h>
  
-namespace luci
+namespace circle_planner
  {
  // struct for additional information for the node. it helps build allocations plan for nodes.
  struct AllocationNodeInformation
@@ -50,7 +55,7 @@ struct AllocationNodeInformation
    uint32_t last_node;
    // is the current node temporary or not
    bool is_temp;
-  // operation breadth of current node
+  // Breadth is a sum of live tensors sizes at the moment of execution of given node
    uint32_t breadth;
  
    bool operator<(const AllocationNodeInformation &other) const { return offset < other.offset; }
@@ -60,12 +65,44 @@ class ExecutionPlanner
  {
  public:
    ExecutionPlanner() = delete;
-  explicit ExecutionPlanner(loco::Graph *graph) { _graph = graph; };
+  explicit ExecutionPlanner(loco::Graph *graph) : _graph(graph)
+  {
+    _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+  }
+
+  explicit ExecutionPlanner(loco::Graph *graph, TargetPlatform target_platform) : _graph(graph)
+  {
+    switch (target_platform.platform_type)
+    {
+      case LINUX:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperLinux>();
+        break;
+      case MCU:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperMCU>();
+        break;
+      case CMSISNN:
+        _scratchpad_helper = std::make_unique<ScratchpadHelperCMSISNN>(target_platform.use_dsp);
+        break;
+      default:
+        assert(false && "Use unsupported platform");
+    }
+  };
  
    // Method provides execution plan, which contains execution order and
    // memory offsets for all nodes in _graph.
    // This plan writes in nodes annotation information with help of CircleNodeExecutionPlan class.
-  void get_execution_plan();
+  void make_execution_plan();
+
+  // Method change planning mode:
+  // is_null_consts = true - constants are no longer taken into account when planning
+  // is_null_inputs = true - input are no longer taken into account when planning
+  // is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+  void change_planning_mode(bool is_null_consts, bool is_null_inputs, bool is_null_scratchpads)
+  {
+    _is_null_consts = is_null_consts;
+    _is_null_inputs = is_null_inputs;
+    _is_null_scratchpads = is_null_scratchpads;
+  };
  
  private:
    // Method gets default execution order plan and saves it in _ordered_nodes vector.
@@ -83,18 +120,19 @@ private:
    // Return: required size of buffer.
    uint32_t get_offsets_with_greedy_by_size();
  
-  // Realization of greedy by size approach to find offsets for nodes.
+  // Realization of greedy by size approach (algorithm is mentioned in
+  // "EFFICIENT MEMORY MANAGEMENT FOR DEEP NEURAL NET INFERENCE" paper) to find offsets for nodes.
    uint32_t greedy_by_size_approach();
  
    // Method creates and fills _alloc_node_inform_vector with usage interval inform and node's sizes.
    // null_consts = true - size of const nodes will be equal 0;
    // null_inputs = true - size of input nodes will be equal 0;
-  // null_im2col = true - size of im2col nodes will be equal 0;
-  // It using if we don't want to take input(const or im2col) nodes into account
+  // null_scratchpad = true - size of scratchpad nodes will be equal 0;
+  // It using if we don't want to take input(const or scratchpads) nodes into account
    // when determining offsets and calculating the required buffer size. This is uses for
    // experiments.
    void create_alloc_node_inform_vector(bool null_consts = false, bool null_inputs = false,
-                                       bool null_im2col = false);
+                                       bool null_scratchpad = false);
  
    // Stores allocation additional information for the all nodes from _graph.
    std::vector<AllocationNodeInformation> _alloc_node_inform_vector;
@@ -121,10 +159,21 @@ private:
  
    loco::Graph *_graph;
  
+  // Calculate size of scratchpad tensors for current platform
+  std::unique_ptr<IScratchpadHelper> _scratchpad_helper;
+
    // Required memory size.
    uint32_t _required_size = 0;
+
+  // Flags for choosing different planning modes:
+  // _is_null_consts = true - constants are no longer taken into account when planning
+  // _is_null_inputs = true - input are no longer taken into account when planning
+  // _is_null_scratchpads = true - scratchpads are no longer taken into account when planning
+  bool _is_null_consts = false;
+  bool _is_null_inputs = false;
+  bool _is_null_scratchpads = false;
  };
  
-} // namespace luci
+} // namespace circle_planner
  
  #endif // CIRCLE_EXECUTION_PLANNER_H
diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt

index d0775ea2d1e8554aa60e7b18eed1e7f0f7b3a761..10d26d191b241e9f117305757f259c13122899ee 100644 (file)
--- a/compiler/circle-inspect/CMakeLists.txt
+++ b/compiler/circle-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
    return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
  
  set(DRIVER "driver/Driver.cpp")
  
@@ -10,5 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES})
  target_include_directories(circle-inspect PRIVATE src)
  target_link_libraries(circle-inspect arser)
  target_link_libraries(circle-inspect foder)
-target_link_libraries(circle-inspect mio_circle)
+target_link_libraries(circle-inspect mio_circle04)
+target_link_libraries(circle-inspect mio_circle04_helper)
  target_link_libraries(circle-inspect safemain)
diff --git a/compiler/circle-inspect/README.md b/compiler/circle-inspect/README.md

index 1f76c8ede30a4ad0e86fa663c16a5b370d44d62f..94eea7b080f352f6ba00ac08fcee23148ff26a51 100644 (file)
--- a/compiler/circle-inspect/README.md
+++ b/compiler/circle-inspect/README.md
@@ -20,3 +20,19 @@ ADD
  ```
  
  To get the count of specific operator, use other tools like sort, uniq, etc.
+
+Operators with `--tensor_dtype`
+- show name and dtype of each tensor one line at a time
+
+Example
+```
+$ circle-inspect --tensor_dtype quantized_conv2d.circle
+```
+
+Result
+```
+ifm UINT8
+weights UINT8
+bias INT32
+ofm UINT8
+```
diff --git a/compiler/circle-inspect/driver/Driver.cpp b/compiler/circle-inspect/driver/Driver.cpp

index a450fd9e00ce0c19f7fae59c6316e529bc5fb3fb..10e185de5207e812ed1ae3fe13e772f1f39fad8f 100644 (file)
--- a/compiler/circle-inspect/driver/Driver.cpp
+++ b/compiler/circle-inspect/driver/Driver.cpp
@@ -35,6 +35,7 @@ int entry(int argc, char **argv)
      .nargs(0)
      .help("Dump Conv2D series weight operators in circle file");
    arser.add_argument("--op_version").nargs(0).help("Dump versions of the operators in circle file");
+  arser.add_argument("--tensor_dtype").nargs(0).help("Dump dtype of tensors");
    arser.add_argument("circle").type(arser::DataType::STR).help("Circle file to inspect");
  
    try
@@ -48,7 +49,8 @@ int entry(int argc, char **argv)
      return 255;
    }
  
-  if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"])
+  if (!arser["--operators"] && !arser["--conv2d_weight"] && !arser["--op_version"] &&
+      !arser["--tensor_dtype"])
    {
      std::cout << "At least one option must be specified" << std::endl;
      std::cout << arser;
@@ -63,6 +65,8 @@ int entry(int argc, char **argv)
      dumps.push_back(std::make_unique<circleinspect::DumpConv2DWeight>());
    if (arser["--op_version"])
      dumps.push_back(std::make_unique<circleinspect::DumpOperatorVersion>());
+  if (arser["--tensor_dtype"])
+    dumps.push_back(std::make_unique<circleinspect::DumpTensorDType>());
  
    std::string model_file = arser.get<std::string>("circle");
  
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake

index 81e0f0dbdba66eb34caf0fdbd0fb8b7bf3d46b40..362d67cf4a08563dc939bcd12f64249878d23723 100644 (file)
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,3 +1,3 @@
  require("arser")
-require("mio-circle")
+require("mio-circle04")
  require("safemain")
diff --git a/compiler/circle-inspect/src/Dump.cpp b/compiler/circle-inspect/src/Dump.cpp

index 5c71afb3fe8956244be49796b7b8adba8179b00d..bba5e56c3bf13fd330a1696abc10b6cd4c574e83 100644 (file)
--- a/compiler/circle-inspect/src/Dump.cpp
+++ b/compiler/circle-inspect/src/Dump.cpp
@@ -175,3 +175,28 @@ void DumpOperatorVersion::run(std::ostream &os, const circle::Model *model)
  }
  
  } // namespace circleinspect
+
+namespace circleinspect
+{
+
+void DumpTensorDType::run(std::ostream &os, const circle::Model *model)
+{
+  circleinspect::Reader reader(model);
+
+  const uint32_t subgraph_size = reader.num_subgraph();
+
+  for (uint32_t g = 0; g < subgraph_size; g++)
+  {
+    reader.select_subgraph(g);
+    auto tensors = reader.tensors();
+
+    for (uint32_t i = 0; i < tensors->Length(); ++i)
+    {
+      const auto tensor = tensors->Get(i);
+
+      os << reader.tensor_name(tensor) << " " << reader.tensor_dtype(tensor) << std::endl;
+    }
+  }
+}
+
+} // namespace circleinspect
diff --git a/compiler/circle-inspect/src/Dump.h b/compiler/circle-inspect/src/Dump.h

index 996c421f92b1bd3398a5220db5f229a5cdfee308..8ca6838d1972284075a41da4502f3a562df4ee38 100644 (file)
--- a/compiler/circle-inspect/src/Dump.h
+++ b/compiler/circle-inspect/src/Dump.h
@@ -60,6 +60,15 @@ public:
    void run(std::ostream &os, const circle::Model *model);
  };
  
+class DumpTensorDType final : public DumpInterface
+{
+public:
+  DumpTensorDType() = default;
+
+public:
+  void run(std::ostream &os, const circle::Model *model);
+};
+
  } // namespace circleinspect
  
  #endif // __DUMP_H__
diff --git a/compiler/circle-inspect/src/Reader.cpp b/compiler/circle-inspect/src/Reader.cpp

index 7807db38a4eaa628ffdb11faf75f7b65634f37d9..0e2865254aea52db0e791b378d344aefdd003e56 100644 (file)
--- a/compiler/circle-inspect/src/Reader.cpp
+++ b/compiler/circle-inspect/src/Reader.cpp
@@ -16,66 +16,14 @@
  
  #include "Reader.h"
  
+#include <mio_circle/Helper.h>
+
  #include <sstream>
  #include <string>
  
  namespace circleinspect
  {
  
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
  Reader::Reader(const circle::Model *model)
  {
    _subgraphs = model->subgraphs();
@@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return mio::circle::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const circle::Operator *op) const
@@ -131,14 +79,24 @@ std::string Reader::opcode_name(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::circle::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  return circleinspect::opcode_name(opcode);
+  return mio::circle::opcode_name(opcode);
+}
+
+std::string Reader::tensor_name(const circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
  }
  
  bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circle-inspect/src/Reader.h b/compiler/circle-inspect/src/Reader.h

index b5a99df3f132fc64dfe9c6631b38ef36bd0219ac..c38ec3990fb7d75db9ccf4ea1ebc81b25f9048bd 100644 (file)
--- a/compiler/circle-inspect/src/Reader.h
+++ b/compiler/circle-inspect/src/Reader.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
  /**
   * @brief Loads Circle file and provides helpers to access attributes
   */
@@ -71,6 +65,8 @@ public:
    size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
    circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
    std::string opcode_name(const circle::Operator *op) const;
+  std::string tensor_name(const circle::Tensor *tensor) const;
+  std::string tensor_dtype(const circle::Tensor *tensor) const;
  
  public:
    bool select_subgraph(uint32_t subgraph);
diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md

index c06899ab511d7a6ccd58da97df63f32fa0bebe11..5ea2d32c49d85449389345a629d5d4d5298e8d5e 100644 (file)
--- a/compiler/circle-opselector/README.md
+++ b/compiler/circle-opselector/README.md
@@ -1,21 +1,21 @@
-# circle-opselector\r
-\r
-`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.\r
-\r
-## Example\r
-\r
-### 1. Select from location numbers\r
-\r
-```bash\r
-./circle-opselector --by_id "1-3,5" input.circle output.circle\r
-```\r
-\r
-Then, output.circle which has node 1, 2, 3 and 5 will be created.\r
-\r
-### 2. Select from node names\r
-\r
-```bash\r
-./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle\r
-```\r
-\r
-Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.\r
+# circle-opselector
+
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.
+
+## Example
+
+### 1. Select from location numbers
+
+```bash
+./circle-opselector --by_id "1-3,5" input.circle output.circle
+```
+
+Then, output.circle which has node 1, 2, 3 and 5 will be created.
+
+### 2. Select from node names
+
+```bash
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle
+```
+
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt

index 1cfbcbd9bbddcf55416c61e6c8afc66c7944cca6..0657607d25e89e112759acf40c5186eca9f91698 100644 (file)
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -82,8 +82,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
  
    # Run partitioner
    add_custom_command(OUTPUT ${PARTITIONER_CONN_JSON}
-    COMMAND circle_partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
-    DEPENDS circle_partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
+    COMMAND circle-partitioner "${PART_FILE}" "${PARTITION_NAME}.circle" "${PARTITIONER_OUTPUT_PATH}"
+    DEPENDS circle-partitioner ${PART_DST_PATH} ${CIRCLE_DST_PATH}
      COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
    )
    list(APPEND TEST_DEPS ${PARTITIONER_CONN_JSON})
@@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
  add_test(NAME circle_part_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
-          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
+          "${NNCC_OVERLAY_DIR}/venv_2_8_0"
            "$<TARGET_FILE:circle_part_driver>"
            ${PARTITION_LIST}
  )
diff --git a/compiler/circle-part-value-test/part_eval_one.py b/compiler/circle-part-value-test/part_eval_one.py

index 91e32d78f296c225058d0ba41e27eadbbb258e14..44661c78b4f39dc12d7572f1e743edb0829f960c 100755 (executable)
--- a/compiler/circle-part-value-test/part_eval_one.py
+++ b/compiler/circle-part-value-test/part_eval_one.py
@@ -53,21 +53,37 @@ except:
  interpreter = tf.lite.Interpreter(tflite_model)
  interpreter.allocate_tensors()
  
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
  # Generate random input data.
  num_inputs = len(interpreter.get_input_details())
  for i in range(num_inputs):
      input_details = interpreter.get_input_details()[i]
-    if input_details["dtype"] == np.float32:
+    input_details_dtype = input_details["dtype"]
+    input_details_shape = input_details["shape"]
+    if input_details_dtype == np.float32:
          input_data = np.array(
-            np.random.random_sample(input_details["shape"]), input_details["dtype"])
-    elif input_details["dtype"] == np.uint8:
+            np.random.random_sample(input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.int16:
          input_data = np.array(
-            np.random.randint(0, 256, size=input_details["shape"]),
-            input_details["dtype"])
-    elif input_details["dtype"] == np.bool_:
+            np.random.randint(0, 100, size=input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.uint8:
          input_data = np.array(
-            np.random.choice(a=[True, False], size=input_details["shape"]),
-            input_details["dtype"])
+            np.random.randint(0, 256, size=input_details_shape), input_details_dtype)
+    elif input_details_dtype == np.bool_:
+        input_data = np.array(
+            np.random.choice(a=[True, False], size=input_details_shape),
+            input_details_dtype)
      else:
          raise SystemExit("Unsupported input dtype")
  
@@ -90,52 +106,42 @@ print("", flush=True)
  subprocess.run(partition_command, check=True)
  
  # Compare the results.
-for idx in range(len(interpreter.get_output_details())):
-    output_details = interpreter.get_output_details()[idx]
-    output_data = np.fromfile(circle_model + ".output" + str(idx),
-                              output_details["dtype"])
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
+    output_dtype = output_details["dtype"]
+    output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
      shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
      output_shape = [int(i) for i in shape_file.read().split(',')]
      luci_output_data = np.reshape(output_data, output_shape)
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
      try:
-        if output_details["dtype"] == np.uint8:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+        if output_dtype == np.uint8:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
-        elif output_details["dtype"] == np.float32:
+        elif output_dtype == np.float32:
              if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=1.e-5,
-                    atol=1.e-5) == False:
+                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
-        elif output_details["dtype"] == np.int64:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+        elif output_dtype == np.int64:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
-        elif output_details["dtype"] == np.int32:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+        elif output_dtype == np.int32:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int16:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          else:
-            raise SystemExit("Unsupported data type: ", output_details["dtype"])
+            raise SystemExit("Unsupported data type: ", output_dtype)
      except:
          print(traceback.format_exc())
          quit(255)
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part

new file mode 100644 (file)

index 0000000..496971e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part

new file mode 100644 (file)

index 0000000..9913fea
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+UNPACK=npu
diff --git a/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part

new file mode 100644 (file)

index 0000000..c63efc5
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Net_UnpackAdd_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+UNPACK=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part

new file mode 100644 (file)

index 0000000..ad08421
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+MUL=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part

new file mode 100644 (file)

index 0000000..c82b741
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SQRT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part

new file mode 100644 (file)

index 0000000..d9d2a8e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Mul_Sqrt_FC_nobias_000_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Split_Add_000.part b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part

new file mode 100644 (file)

index 0000000..91af566
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Split_Add_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+SPLIT=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part

new file mode 100644 (file)

index 0000000..d4d439d
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+DIV=acl_cl
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part

new file mode 100644 (file)

index 0000000..dbd174e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+TANH=cpu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part

new file mode 100644 (file)

index 0000000..475439a
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_002.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=cpu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=npu
diff --git a/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part

new file mode 100644 (file)

index 0000000..d9d2a8e
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/Part_Tanh_FC_nobias_003.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,npu
+default=npu
+comply=opcode
+
+[OPCODE]
+FULLY_CONNECTED=cpu
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part

new file mode 100644 (file)

index 0000000..e469eeb
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part

new file mode 100644 (file)

index 0000000..e469eeb
--- /dev/null
+++ b/compiler/circle-part-value-test/parts/SignatureDef_MultiOut_001.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+MAXIMUM=acl_cl
diff --git a/compiler/circle-part-value-test/test.lst b/compiler/circle-part-value-test/test.lst

index af2f5ba5cb328e45dfdf2c13fa907dae2f43728e..b7a3f403acd1d817d13278cf3516a66e38f301cc 100644 (file)
--- a/compiler/circle-part-value-test/test.lst
+++ b/compiler/circle-part-value-test/test.lst
@@ -35,3 +35,24 @@ add(Part_If_Add_Sub_001 Part_If_Add_Sub_001.001 3)
  # WHILE with subgraphs
  add(Part_While_000 Part_While_000 3)
  add(Part_While_001 Part_While_001 3)
+
+# UNPACK with multiple outputs
+add(Net_UnpackAdd_001 Net_UnpackAdd_001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.001 2)
+add(Net_UnpackAdd_001 Net_UnpackAdd_001.002 2)
+
+# Other multiple outputs
+add(Part_Split_Add_000 Part_Split_Add_000 2)
+
+# test SignatureDef, with any OPCODE
+add(SignatureDef_MultiOut_000 SignatureDef_MultiOut_000 0)
+add(SignatureDef_MultiOut_001 SignatureDef_MultiOut_001 0)
+
+# FC with nobias
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias 1)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_001 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_002 2)
+add(Part_Tanh_FC_nobias Part_Tanh_FC_nobias_003 2)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_000 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_001 0)
+add(Part_Mul_Sqrt_FC_nobias_000 Part_Mul_Sqrt_FC_nobias_000_002 0)
diff --git a/compiler/circle-partitioner-test/CMakeLists.txt b/compiler/circle-partitioner-test/CMakeLists.txt

index ed8c97948761b39ccd5a9cc1c80eb11d2c3e281c..e29a66b41e0ab1d2238eb2b7e919e76953905c4d 100644 (file)
--- a/compiler/circle-partitioner-test/CMakeLists.txt
+++ b/compiler/circle-partitioner-test/CMakeLists.txt
@@ -57,8 +57,8 @@ foreach(IDX RANGE ${RECIPE_LENGTH_M1})
    # Run partitioner
    set(PART_CONN_JSON "${PART_OUT_PATH}/${PART_NAME}.conn.json")
    add_custom_command(OUTPUT ${PART_CONN_JSON}
-    COMMAND circle_partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
-    DEPENDS circle_partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
+    COMMAND circle-partitioner "${PART_FILE}" "${PART_NAME}.circle" "${PART_OUT_PATH}"
+    DEPENDS circle-partitioner ${CIRCLE_DST_PATH} ${PART_DST_PATH}
      COMMENT "Parition ${RECIPE_NAME}.circle with ${PART_FILE}"
    )
    # NOTE this is checked in build time and not added with 'add_test' command
diff --git a/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part

new file mode 100644 (file)

index 0000000..01b8c70
--- /dev/null
+++ b/compiler/circle-partitioner-test/parts/Part_Add_SVDF_000.part
@@ -0,0 +1,7 @@
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
diff --git a/compiler/circle-partitioner-test/test.lst b/compiler/circle-partitioner-test/test.lst

index b731f8d0eaee09a54c1abf674cc9ce31c706b816..c0c185c7e05d7c50a6a01e68090c5abacf8e833a 100644 (file)
--- a/compiler/circle-partitioner-test/test.lst
+++ b/compiler/circle-partitioner-test/test.lst
@@ -5,3 +5,7 @@
  # add(RECIPE_NAME PART_NAME)
  
  add(Net_InstanceNorm_003 Net_InstanceNorm_003)
+
+# NOTE SVDF partition test is done here as value test may need custom tolerance
+# TODO move Part_Add_SVDF_000 to circle-part-value-test when ready
+add(Part_Add_SVDF_000 Part_Add_SVDF_000)
diff --git a/compiler/circle-partitioner/CMakeLists.txt b/compiler/circle-partitioner/CMakeLists.txt

index 28a16c9fc9fefd3ec566830de07e96410ec6288b..9b8f5afae09ddf714c4e8f59c4aa4696fedbf38d 100644 (file)
--- a/compiler/circle-partitioner/CMakeLists.txt
+++ b/compiler/circle-partitioner/CMakeLists.txt
@@ -1,5 +1,24 @@
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
+add_executable(circle-partitioner "${SOURCES}")
+target_link_libraries(circle-partitioner foder)
+target_link_libraries(circle-partitioner crew)
+target_link_libraries(circle-partitioner safemain)
+target_link_libraries(circle-partitioner luci_lang)
+target_link_libraries(circle-partitioner luci_log)
+target_link_libraries(circle-partitioner luci_import)
+target_link_libraries(circle-partitioner luci_service)
+target_link_libraries(circle-partitioner luci_pass)
+target_link_libraries(circle-partitioner luci_export)
+target_link_libraries(circle-partitioner luci_partition)
+target_link_libraries(circle-partitioner arser)
+target_link_libraries(circle-partitioner pepper_csv2vec)
+target_link_libraries(circle-partitioner vconone)
+target_link_libraries(circle-partitioner nncc_common)
+
+install(TARGETS circle-partitioner DESTINATION bin)
+
+# TODO remove circle_partitioner
  add_executable(circle_partitioner "${SOURCES}")
  target_link_libraries(circle_partitioner foder)
  target_link_libraries(circle_partitioner crew)
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md

index 5fd312e332219fc0eeb38cc05a8c1e31decbb115..2e0a98638abaaacc2c4725c0ebaa978e7dc0a286 100644 (file)
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -94,7 +94,7 @@ Net_InstanceNorm_003/
  
  Command example
  ```
-./circle_partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
+./circle-partitioner Net_InstanceNorm_003.part Net_InstanceNorm_003.circle Net_InstanceNorm_003
  ```
  
  Result of _circle-partitioner_
@@ -163,3 +163,131 @@ as the `source` model: `[ "Input" ]`.
  `Net_InstanceNorm_003.00002_acl_cl.circle` which they should be connected.
  - And `outputs` `[ "Div" ]` should be connected to `inputs` of
  third model `Net_InstanceNorm_003.00003_cpu.circle`.
+
+### Execution example
+
+Consider partitioning with backends of OneRT
+- `cpu`, `acl_cl`, `acl_neon`, `ruy`, `xnnpack`
+
+Let's try with this command:
+```
+circle_partitioner \
+   --partition Net_InstanceNorm_003.part \
+   --backends cpu,acl_cl \
+   --default cpu \
+   Net_InstanceNorm_003.circle Net_InstanceNorm_003
+```
+
+where `Net_InstanceNorm_003.part` is like this for initial design
+```
+[partition]
+backends=cpu,acl_cl
+default=cpu
+comply=opcode
+
+[OPCODE]
+ADD=acl_cl
+```
+where in `[partition]` section,
+- `backends` is available backends and can be overridden by `--backends`
+- `default` is default backend for OpCodes not assigned in `[OPCODE]` section can be overridden by `--default`
+- `comply` is which rule to apply, where only `opcode` is available for now
+
+#### Use Op name to assign backend
+
+```
+[OP]
+Reduction_indices=GPU
+```
+- there are very long names that may be inconvenient
+
+### Partitioned output
+
+#### Output files
+
+After partition is applied, output files will look something like these
+- `Net_InstanceNorm_003.part.00001_cpu.circle`
+- `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+- `Net_InstanceNorm_003.part.00003_cpu.circle`
+- `Net_InstanceNorm_003.part.conn.ini`
+- `Net_InstanceNorm_003.part.conn.json`
+
+Assume only `Div` node is assigned to `acl_cl`
+
+#### Connection information of partitioned circle files
+
+##### Format with ini
+- `Net_InstanceNorm_003.conn.ini` provides connection of each circle files.
+```
+[source]
+file=Net_InstanceNorm_003.circle
+i1=Input
+o1=Add_as_terminal
+
+[models]
+m1=Net_InstanceNorm_003.part.00001_cpu.circle
+m2=Net_InstanceNorm_003.part.00002_acl_cl.circle
+m3=Net_InstanceNorm_003.part.00003_cpu.circle
+
+[Net_InstanceNorm_003.part.00001_cpu.circle]
+file=Net_InstanceNorm_003.part.00001_cpu.circle
+i1=Input
+o1=Pow
+o2=Sub
+
+[Net_InstanceNorm_003.part.00002_acl_cl.circle]
+file=Net_InstanceNorm_003.part.00002_acl_cl.circle
+i1=Sub
+i2=Pow
+o1=Div
+
+[Net_InstanceNorm_003.part.00003_cpu.circle]
+file=Net_InstanceNorm_003.part.00003_cpu.circle
+i1=Div
+o1=Add_as_terminal
+```
+
+Predefined section
+- `source`: Source circle model information. Has `file` as filename, `iN` for inputs and `oN` for outputs.
+- `models`: Partitioned circle models. Has `mN` for model filename.
+
+Partitioned Model section
+- `iN`: inputs of this model
+- `oN`: outputs of this model
+
+In graph diagram, output order of `Net_InstanceNorm_003.part.00001_cpu.circle`
+looks like `Pow,Sub` but `Div` Op in `Net_InstanceNorm_003.part.00002_acl_cl.circle`
+requires order of `Sub,Pow`.
+
+##### Format with JSON
+- Use JSON format, `Net_InstanceNorm_003.part.conn.json`
+```json
+{
+  "source" : {
+    "file" : "Net_InstanceNorm_003.circle",
+    "inputs" : [ "Input" ],
+    "outputs" : [ "Add_as_terminal" ]
+  },
+  "parts" : [
+    {
+      "file" : "Net_InstanceNorm_003.part.00001_cpu.circle",
+      "inputs" : [ "Input" ],
+      "outputs" : [ "Pow", "Sub" ],
+    },
+    {
+      "file" : "Net_InstanceNorm_003.part.00002_acl_cl.circle",
+      "inputs" : [ "Pow", "Sub" ],
+      "outputs" : [ "Div" ]
+    },
+    {
+      "file" : "Net_InstanceNorm_003.part.00003_cpu.circle",
+      "inputs" : [ "Div" ],
+      "outputs" : [ "Add_as_terminal" ]
+    }
+  ]
+}
+```
+
+### Future works
+
+How to partition with multiple inputs?
diff --git a/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt

new file mode 100644 (file)

index 0000000..5ec8b6e
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/CMakeLists.txt
@@ -0,0 +1,144 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_include(TargetRequire)
+
+unset(REQUIRED_TARGETS)
+list(APPEND REQUIRED_TARGETS circle-inspect)
+list(APPEND REQUIRED_TARGETS circle-verify)
+list(APPEND REQUIRED_TARGETS circle-quantizer)
+list(APPEND REQUIRED_TARGETS record-minmax)
+list(APPEND REQUIRED_TARGETS dredd_rule_lib)
+TargetRequire_Return(${REQUIRED_TARGETS})
+
+unset(TEST_DEPS)
+unset(TEST_NAMES)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+set(options USE_QCONFIG)
+set(oneValueArgs DTYPE GRANULARITY)
+set(multiValueArgs "")
+
+macro(Add RECIPE)
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  set(QCONFIG_OPT "")
+  if(ARG_USE_QCONFIG)
+    set(QCONFIG_OPT "--config" "${ARTIFACTS_BIN_PATH}/${RECIPE}.qconf.json")
+  endif()
+
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  set(FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.fq.circle")
+  set(RECORDED_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.recorded.circle")
+  set(QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate quantized .circle
+  add_custom_command(OUTPUT ${QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --quantize_dequantize_weights float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${CIRCLE_PATH} ${FAKE_QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:record-minmax> --input_model ${FAKE_QUANT_CIRCLE_PATH} --output_model ${RECORDED_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --quantize_with_minmax float32 ${ARG_DTYPE} ${ARG_GRANULARITY} ${QCONFIG_OPT} ${RECORDED_CIRCLE_PATH} ${QUANT_CIRCLE_PATH}
+    DEPENDS 
+      circle-quantizer
+      record-minmax
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(Add)
+
+# Macro to generate fully fake-quantized models
+macro(AddFakeQuant RECIPE)
+  set(CIRCLE_PATH "${ARTIFACTS_BIN_PATH}/${RECIPE}.circle")
+  # NOTE We use .q.circle because it is convention for output file (see testall.sh for more details)
+  set(FULL_FAKE_QUANT_CIRCLE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE}.q.circle")
+
+  # Generate fully fake-quantized .circle
+  add_custom_command(OUTPUT ${FULL_FAKE_QUANT_CIRCLE_PATH}
+    COMMAND $<TARGET_FILE:circle-quantizer> --fake_quantize ${CIRCLE_PATH} ${FULL_FAKE_QUANT_CIRCLE_PATH}
+    DEPENDS
+      circle-quantizer
+      ${CIRCLE_PATH}
+    COMMENT "Generate ${RECIPE}.q.circle"
+  )
+
+  list(APPEND TEST_DEPS ${FULL_FAKE_QUANT_CIRCLE_PATH})
+  list(APPEND TEST_NAMES ${RECIPE})
+endmacro(AddFakeQuant)
+
+# Read "test.lst"
+include("test.lst")
+
+##
+## Copy testall
+##
+set(TEST_RUNNER "${CMAKE_CURRENT_BINARY_DIR}/testall.sh")
+set(TEST_RUNNER_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh")
+
+add_custom_command(
+  OUTPUT ${TEST_RUNNER}
+  COMMAND ${CMAKE_COMMAND} -E copy "${TEST_RUNNER_SOURCE}" "${TEST_RUNNER}"
+  DEPENDS ${TEST_RUNNER_SOURCE}
+  COMMENT "Generate test runner"
+)
+
+list(APPEND TEST_DEPS "${TEST_RUNNER}")
+
+###
+### Generate test.config
+###
+set(TEST_CONFIG "${CMAKE_CURRENT_BINARY_DIR}/test.config")
+
+add_custom_command(
+  OUTPUT ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_INSPECT_PATH=\"$<TARGET_FILE:circle-inspect>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_VERIFY_PATH=\"$<TARGET_FILE:circle-verify>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'RECORD_MINMAX_PATH=\"$<TARGET_FILE:record-minmax>\"' >> ${TEST_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E echo 'CIRCLE_QUANTIZER_PATH=\"$<TARGET_FILE:circle-quantizer>\"' >> ${TEST_CONFIG}
+  DEPENDS
+    circle-inspect
+    circle-verify
+    record-minmax
+    circle-quantizer
+  COMMENT "Generate test configuration"
+)
+
+list(APPEND TEST_DEPS "${TEST_CONFIG}")
+
+#
+# copy rule-lib.sh (a library of shell script functions)
+#
+
+# getting path for rule-lib.sh in dredd-rule-lib
+get_target_property(DREDD_RULE_LIB_DIR dredd_rule_lib BINARY_DIR)
+
+set(RULE_LIB_SOURCE_PATH "${DREDD_RULE_LIB_DIR}/rule-lib.sh")
+set(RULE_LIB_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/rule-lib.sh")
+
+add_custom_command(
+  OUTPUT ${RULE_LIB_BINARY_PATH}
+  COMMAND ${CMAKE_COMMAND} -E copy "${RULE_LIB_SOURCE_PATH}" "${RULE_LIB_BINARY_PATH}"
+  DEPENDS ${RULE_LIB_SOURCE_PATH}
+  COMMENT "Generate rule lib"
+)
+
+list(APPEND TEST_DEPS "${RULE_LIB_BINARY_PATH}")
+
+# Generate dependencies
+add_custom_target(circle_quantizer_dredd_recipe_test ALL DEPENDS ${TEST_DEPS})
+add_dependencies(circle_quantizer_dredd_recipe_test common_artifacts_deps)
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+# Run tests
+add_test(
+  NAME circle_quantizer_dredd_recipe_test
+  COMMAND ${TEST_RUNNER}
+          ${TEST_CONFIG}
+          ${ARTIFACTS_BIN_PATH}
+          ${TEST_NAMES}
+)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/README.md b/compiler/circle-quantizer-dredd-recipe-test/README.md

new file mode 100644 (file)

index 0000000..6152549
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/README.md
@@ -0,0 +1,37 @@
+# circle-quantizer-dredd-recipe-test
+
+It tests non-functional conditions of a quantized circle model generated by circle-quantizer.
+
+## How to add a test?
+
+1. Create a directory under `res/TensorFlowLiteRecipes/` or `res/CircleRecipes/`.
+
+2. Make a recipe (`test.recipe`) for fp32 model under the directory.
+
+3. Make a rule (`test.rule`) you want to test under the directory. (For more information on dredd-test-rules, see _dredd-rule-lib_ module.)
+
+4. Add test to `test.lst` in this module with `Add` macro.
+
+```
+Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG)
+```
+
+- `RECIPE_DIR`: Path to the directory where the recipe file is saved.
+- `DTYPE`: Default quantization dtype (uint8, int16)
+- `GRANULARITY`: Quantization granularity (channel, layer)
+- `USE_QCONFIG`: (Optional) Whether to use a quantization configuration file or not. If this is set, `test.qconf.json` should exist under `RECIPE_DIR`
+
+## Example
+
+```
+# TensorFlowLiteRecipes
+res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000
+├── test.recipe     # What you want to test
+└── test.rule       # Non-functional conditions to be satisfied
+└── test.qconf.json # Quantization configuration file (optional)
+
+# test.lst
+...
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+...
+```
diff --git a/compiler/circle-quantizer-dredd-recipe-test/requires.cmake b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake

new file mode 100644 (file)

index 0000000..7450f73
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/requires.cmake
@@ -0,0 +1,6 @@
+require("circle-quantizer")
+require("record-minmax")
+require("circle-inspect")
+require("circle-verify")
+require("common-artifacts")
+require("dredd-rule-lib")
diff --git a/compiler/circle-quantizer-dredd-recipe-test/test.lst b/compiler/circle-quantizer-dredd-recipe-test/test.lst

new file mode 100644 (file)

index 0000000..1881030
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/test.lst
@@ -0,0 +1,15 @@
+## EXAMPLE
+#
+# Add(RECIPE_DIR DTYPE dtype GRANULARITY granularity USE_QCONFIG(optional))
+# AddFakeQuant(RECIPE_DIR)
+#
+
+## TFLITE RECIPE
+
+Add(Quant_Conv_Mul_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Conv_Mul_Add_002 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_000 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+Add(Quant_Split_Add_001 DTYPE uint8 GRANULARITY channel USE_QCONFIG)
+
+AddFakeQuant(Quant_Add_000)
diff --git a/compiler/circle-quantizer-dredd-recipe-test/testall.sh b/compiler/circle-quantizer-dredd-recipe-test/testall.sh

new file mode 100755 (executable)

index 0000000..e5d5cf2
--- /dev/null
+++ b/compiler/circle-quantizer-dredd-recipe-test/testall.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+# Need at least 2 arguments
+if [[ $# -lt 2 ]]; then
+  echo "USAGE: $0 ..."
+  echo
+  echo "ARGUMENTS:"
+  echo "  [test.config path]"
+  echo "  [WORKDIR]"
+  echo "  [Prefix1]"
+  echo "  [Prefix2]"
+  echo "  ..."
+  exit 255
+fi
+
+WORKDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+CONFIG_PATH="$1"; shift
+RESOURCE_DIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found circle-inspect: ${CIRCLE_INSPECT_PATH}"
+echo "-- Found circle-verify: ${CIRCLE_VERIFY_PATH}"
+echo "-- Found circle-quantizer: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found record-minmax: ${RECORD_MINMAX_PATH}"
+echo "-- Found common-artifacts: ${RESOURCE_DIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd ${WORKDIR}
+while [[ $# -ne 0 ]]; do
+  PREFIX="$1"; shift
+
+  TESTED+=("${PREFIX}")
+
+  PASSED_TAG="${PREFIX}.passed"
+
+  rm -f "${PASSED_TAG}"
+
+  cat > "${PREFIX}.log" <(
+    exec 2>&1
+
+    echo "-- Found circle: ${PREFIX}.q.circle"
+
+    # Exit immediately if any command fails
+    set -e
+    # Show commands
+    set -x
+
+    #
+    # Check if rule is satisfied
+    #
+
+    # Note: turn off 'command printing'. Otherwise printing will be so messy
+    set +x
+
+    # (COMPILED_FILE, INSPECT_PROG_PATH, VERIFY_PROG_PATH, ERROR_LOG) must be set for rule-lib.sh
+    COMPILED_FILE="${PREFIX}.q.circle"
+    INSPECT_PROG_PATH=${CIRCLE_INSPECT_PATH}
+    VERIFY_PROG_PATH=${CIRCLE_VERIFY_PATH}
+    ERROR_LOG="${PREFIX}.error"
+
+    rm -f "${ERROR_LOG}"
+
+    # in case error while running rule-lib.sh, prints error msg
+    trap 'echo "** ERROR **" ; cat "${ERROR_LOG}"' ERR
+
+    source rule-lib.sh
+    source "${RESOURCE_DIR}/${PREFIX}.rule"
+
+    # unset
+    trap - ERR
+    set -x
+
+    # At this point, the exit code of all commands is 0
+    # If not 0, execution of this script ends because of "set -e"
+    touch "${PASSED_TAG}"
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$PREFIX")
+  else
+    FAILED+=("$PREFIX")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/circle-quantizer/CMakeLists.txt b/compiler/circle-quantizer/CMakeLists.txt

index a5f5f61c426b7f552bb04ff5182b9d6da9fe78f9..14e00972b99b2ad39aa3bc39b28766bcd0b00831 100644 (file)
--- a/compiler/circle-quantizer/CMakeLists.txt
+++ b/compiler/circle-quantizer/CMakeLists.txt
@@ -1,11 +1,19 @@
+nnas_find_package(Jsoncpp)
+if(NOT Jsoncpp_FOUND)
+  message(STATUS "Build jsoncpp: FAILED (missing jsoncpp)")
+  return()
+endif(NOT Jsoncpp_FOUND)
+
  set (SOURCES src/CircleQuantizer.cpp)
  
  add_executable(circle-quantizer "${SOURCES}")
+target_include_directories(circle-quantizer PRIVATE ${Jsoncpp_INCLUDE_DIRS})
+
+target_link_libraries(circle-quantizer ${Jsoncpp_STATIC_LIB})
  target_link_libraries(circle-quantizer foder)
  target_link_libraries(circle-quantizer safemain)
  target_link_libraries(circle-quantizer oops)
  target_link_libraries(circle-quantizer loco)
-target_link_libraries(circle-quantizer mio_circle)
  target_link_libraries(circle-quantizer luci_import)
  target_link_libraries(circle-quantizer luci_service)
  target_link_libraries(circle-quantizer luci_pass)
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp

index 57ac30a871947e5fa2f9eba55347e2a1837899b8..e0c85cb6e88d32b10f3100137f3b8fb1c0872710 100644 (file)
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -17,7 +17,7 @@
  #include <foder/FileLoader.h>
  
  #include <luci/Importer.h>
-#include <luci/CircleOptimizer.h>
+#include <luci/CircleQuantizer.h>
  #include <luci/Service/Validate.h>
  #include <luci/CircleExporter.h>
  #include <luci/CircleFileExpContract.h>
@@ -26,6 +26,7 @@
  #include <oops/InternalExn.h>
  #include <arser/arser.h>
  #include <vconone/vconone.h>
+#include <json.h>
  
  #include <functional>
  #include <iostream>
@@ -34,8 +35,41 @@
  
  using OptionHook = std::function<int(const char **)>;
  
-using Algorithms = luci::CircleOptimizer::Options::Algorithm;
-using AlgorithmParameters = luci::CircleOptimizer::Options::AlgorithmParameters;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+std::vector<std::shared_ptr<LayerParam>> read_layer_params(std::string &filename)
+{
+  Json::Value root;
+  std::ifstream ifs(filename);
+
+  // Failed to open cfg file
+  if (not ifs.is_open())
+    throw std::runtime_error("Cannot open config file. " + filename);
+
+  Json::CharReaderBuilder builder;
+  JSONCPP_STRING errs;
+
+  // Failed to parse
+  if (not parseFromStream(builder, ifs, &root, &errs))
+    throw std::runtime_error("Cannot parse config file (json format). " + errs);
+
+  auto layers = root["layers"];
+  std::vector<std::shared_ptr<LayerParam>> p;
+  for (auto layer : layers)
+  {
+    auto l = std::make_shared<LayerParam>();
+    {
+      l->name = layer["name"].asString();
+      l->dtype = layer["dtype"].asString();
+      l->granularity = layer["granularity"].asString();
+    }
+    p.emplace_back(l);
+  }
+
+  return p;
+}
  
  void print_exclusive_options(void)
  {
@@ -56,15 +90,20 @@ int entry(int argc, char **argv)
  {
    // Simple argument parser (based on map)
    std::map<std::string, OptionHook> argparse;
-  luci::CircleOptimizer optimizer;
+  luci::CircleQuantizer quantizer;
  
-  auto options = optimizer.options();
+  auto options = quantizer.options();
    auto settings = luci::UserSettings::settings();
  
    const std::string qdqw = "--quantize_dequantize_weights";
    const std::string qwmm = "--quantize_with_minmax";
    const std::string rq = "--requantize";
    const std::string fq = "--force_quantparam";
+  const std::string cq = "--copy_quantparam";
+  const std::string fake_quant = "--fake_quantize";
+  const std::string cfg = "--config";
+
+  const std::string tf_maxpool = "--TF-style_maxpool";
  
    const std::string gpd = "--generate_profile_data";
  
@@ -99,6 +138,19 @@ int entry(int argc, char **argv)
            "Three arguments required: input_model_dtype(float32) "
            "output_model_dtype(uint8) granularity(layer, channel)");
  
+  arser.add_argument(tf_maxpool)
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("Force MaxPool Op to have the same input/output quantparams. NOTE: This feature can "
+          "degrade accuracy of some models");
+
+  arser.add_argument(fake_quant)
+    .nargs(0)
+    .required(false)
+    .help("Convert a quantized model to a fake-quantized model. NOTE: This feature will "
+          "generate an fp32 model.");
+
    arser.add_argument(rq)
      .nargs(2)
      .type(arser::DataType::STR_VEC)
@@ -116,6 +168,15 @@ int entry(int argc, char **argv)
            "Three arguments required: tensor_name(string), "
            "scale(float) zero_point(int)");
  
+  arser.add_argument(cq)
+    .nargs(2)
+    .type(arser::DataType::STR_VEC)
+    .required(false)
+    .accumulated(true)
+    .help("Copy quantization parameter from a tensor to another tensor."
+          "Two arguments required: source_tensor_name(string), "
+          "destination_tensor_name(string)");
+
    arser.add_argument("--input_type")
      .nargs(1)
      .type(arser::DataType::STR)
@@ -128,6 +189,12 @@ int entry(int argc, char **argv)
      .required(false)
      .help("Output type of quantized model (uint8 or int16)");
  
+  arser.add_argument(cfg)
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .required(false)
+    .help("Path to the quantization configuration file");
+
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
  
@@ -146,11 +213,13 @@ int entry(int argc, char **argv)
    }
  
    {
-    // only one of qdqw, qwmm, rq, fq option can be used
+    // only one of qdqw, qwmm, rq, fq, cq, fake_quant option can be used
      int32_t opt_used = arser[qdqw] ? 1 : 0;
      opt_used += arser[qwmm] ? 1 : 0;
      opt_used += arser[rq] ? 1 : 0;
      opt_used += arser[fq] ? 1 : 0;
+    opt_used += arser[cq] ? 1 : 0;
+    opt_used += arser[fake_quant] ? 1 : 0;
      if (opt_used != 1)
      {
        print_exclusive_options();
@@ -178,6 +247,22 @@ int entry(int argc, char **argv)
      options->param(AlgorithmParameters::Quantize_input_model_dtype, values.at(0));
      options->param(AlgorithmParameters::Quantize_output_model_dtype, values.at(1));
      options->param(AlgorithmParameters::Quantize_granularity, values.at(2));
+
+    if (arser[cfg])
+    {
+      auto filename = arser.get<std::string>(cfg);
+      try
+      {
+        auto layer_params = read_layer_params(filename);
+
+        options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+      }
+      catch (const std::runtime_error &e)
+      {
+        std::cerr << e.what() << '\n';
+        return 255;
+      }
+    }
    }
  
    if (arser[qwmm])
@@ -201,6 +286,25 @@ int entry(int argc, char **argv)
      if (arser["--output_type"])
        options->param(AlgorithmParameters::Quantize_output_type,
                       arser.get<std::string>("--output_type"));
+
+    if (arser[tf_maxpool] and arser.get<bool>(tf_maxpool))
+      options->param(AlgorithmParameters::Quantize_TF_style_maxpool, "True");
+
+    if (arser[cfg])
+    {
+      auto filename = arser.get<std::string>(cfg);
+      try
+      {
+        auto layer_params = read_layer_params(filename);
+
+        options->layer_params(AlgorithmParameters::Quantize_layer_params, layer_params);
+      }
+      catch (const std::runtime_error &e)
+      {
+        std::cerr << e.what() << '\n';
+        return 255;
+      }
+    }
    }
  
    if (arser[rq])
@@ -245,6 +349,34 @@ int entry(int argc, char **argv)
      options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
    }
  
+  if (arser[cq])
+  {
+    auto values = arser.get<std::vector<std::vector<std::string>>>(cq);
+
+    std::vector<std::string> src;
+    std::vector<std::string> dst;
+
+    for (auto const value : values)
+    {
+      if (value.size() != 2)
+      {
+        std::cerr << arser;
+        return 255;
+      }
+
+      src.push_back(value[0]);
+      dst.push_back(value[1]);
+    }
+
+    options->enable(Algorithms::CopyQuantParam);
+
+    options->params(AlgorithmParameters::Quantize_src_tensor_names, src);
+    options->params(AlgorithmParameters::Quantize_dst_tensor_names, dst);
+  }
+
+  if (arser[fake_quant])
+    options->enable(Algorithms::ConvertToFakeQuantizedModel);
+
    std::string input_path = arser.get<std::string>("input");
    std::string output_path = arser.get<std::string>("output");
  
@@ -279,7 +411,7 @@ int entry(int argc, char **argv)
      auto graph = module->graph(idx);
  
      // quantize the graph
-    optimizer.quantize(graph);
+    quantizer.quantize(graph);
  
      if (!luci::validate(graph))
      {
diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt

index 4524260c4f7211ba835fa10132744a8945bf607a..676aecd53304f3ab3dac41c66417aa3564c4407a 100644 (file)
--- a/compiler/circle-tensordump/CMakeLists.txt
+++ b/compiler/circle-tensordump/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
    return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
  
  nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
  
@@ -19,7 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS})
  target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES})
  target_link_libraries(circle-tensordump PRIVATE arser)
  target_link_libraries(circle-tensordump PRIVATE foder)
-target_link_libraries(circle-tensordump PRIVATE mio_circle)
+target_link_libraries(circle-tensordump PRIVATE mio_circle04)
+target_link_libraries(circle-tensordump PRIVATE mio_circle04_helper)
  target_link_libraries(circle-tensordump PRIVATE safemain)
  
  install(TARGETS circle-tensordump DESTINATION bin)
diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake

index 1c754f5183d66088dedde50b0528a4490ac3b36c..183dfe2277106344e5a68f0f38af27f1a4e48909 100644 (file)
--- a/compiler/circle-tensordump/requires.cmake
+++ b/compiler/circle-tensordump/requires.cmake
@@ -1,4 +1,4 @@
  require("arser")
  require("foder")
-require("mio-circle")
+require("mio-circle04")
  require("safemain")
diff --git a/compiler/circle-tensordump/src/Reader.cpp b/compiler/circle-tensordump/src/Reader.cpp

index 429736bfec19848f136b2b7a51afffa8e7be2096..47b87605481b09c90382117c9ef390a0cbed5aa1 100644 (file)
--- a/compiler/circle-tensordump/src/Reader.cpp
+++ b/compiler/circle-tensordump/src/Reader.cpp
@@ -16,66 +16,14 @@
  
  #include "Reader.h"
  
+#include <mio_circle/Helper.h>
+
  #include <sstream>
  #include <string>
  
  namespace circletensordump
  {
  
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
  Reader::Reader(const circle::Model *model)
  {
    _subgraphs = model->subgraphs();
@@ -122,7 +70,7 @@ circle::BuiltinOperator Reader::builtin_code(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return mio::circle::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const circle::Operator *op) const
@@ -131,14 +79,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::circle::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  return circletensordump::opcode_name(opcode);
+  return mio::circle::opcode_name(opcode);
  }
  
  bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circle-tensordump/src/Reader.h b/compiler/circle-tensordump/src/Reader.h

index bbb039552c30c6cea8dfbfe3e2fdbf29452465b9..c868bc2773a209060d72cbf8a26b4957cf27685f 100644 (file)
--- a/compiler/circle-tensordump/src/Reader.h
+++ b/compiler/circle-tensordump/src/Reader.h
@@ -36,12 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
  /**
   * @brief Loads Circle file and provides helpers to access attributes
   */
diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt

index f2217486517b23b0a7cbf7f7fbfa9bb6e72eff38..5d0eb94680aa9bd525e826e5c9d13ea0e89c6195 100644 (file)
--- a/compiler/circle-verify/CMakeLists.txt
+++ b/compiler/circle-verify/CMakeLists.txt
@@ -1,13 +1,14 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+  message(STATUS "Skip circle-verify: mio_circle04 not found")
    return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
  
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_executable(circle-verify ${SOURCES})
  target_include_directories(circle-verify PRIVATE src)
  target_link_libraries(circle-verify arser)
-target_link_libraries(circle-verify mio_circle)
+target_link_libraries(circle-verify mio_circle04)
  target_link_libraries(circle-verify safemain)
  target_link_libraries(circle-verify cwrap)
  target_link_libraries(circle-verify foder)
diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake

index e1b7fb212922a80054ced0c46ae42c9d58029595..74c8f448b947c65ecd17a68650e047d1e9e88d1e 100644 (file)
--- a/compiler/circle-verify/requires.cmake
+++ b/compiler/circle-verify/requires.cmake
@@ -1,5 +1,5 @@
  require("arser")
-require("mio-circle")
+require("mio-circle04")
  require("safemain")
  require("cwrap")
  require("foder")
diff --git a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt

index ee73d63e3df6dfc320fc0428c7f20a6c4907c47d..9ccfd000880efb00036a576364071b6d0942b11f 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
+++ b/compiler/circle2circle-dredd-recipe-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  nnas_include(TargetRequire)
  
  unset(REQUIRED_TARGETS)
diff --git a/compiler/circle2circle/CMakeLists.txt b/compiler/circle2circle/CMakeLists.txt

index 358fc4e2c243d939da01a7d18e054da9fdfd77b6..cd79967b7295ffb6d06eb212f4daa8307d6b1aec 100644 (file)
--- a/compiler/circle2circle/CMakeLists.txt
+++ b/compiler/circle2circle/CMakeLists.txt
@@ -11,7 +11,6 @@ target_link_libraries(circle2circle oops)
  target_link_libraries(circle2circle hermes)
  target_link_libraries(circle2circle hermes_std)
  target_link_libraries(circle2circle loco)
-target_link_libraries(circle2circle mio_circle)
  target_link_libraries(circle2circle luci_env)
  target_link_libraries(circle2circle luci_import)
  target_link_libraries(circle2circle luci_service)
@@ -36,7 +35,6 @@ target_link_libraries(circle2circle_test oops)
  target_link_libraries(circle2circle_test hermes)
  target_link_libraries(circle2circle_test hermes_std)
  target_link_libraries(circle2circle_test loco)
-target_link_libraries(circle2circle_test mio_circle)
  target_link_libraries(circle2circle_test luci_env)
  target_link_libraries(circle2circle_test luci_import)
  target_link_libraries(circle2circle_test luci_service)
diff --git a/compiler/circle2circle/requires.cmake b/compiler/circle2circle/requires.cmake

index 36a9efd168926c3f53026d832558369a503070cd..b6c61198f126ed1fb5709ea96471e60ac3c851f6 100644 (file)
--- a/compiler/circle2circle/requires.cmake
+++ b/compiler/circle2circle/requires.cmake
@@ -3,7 +3,6 @@ require("loco")
  require("locop")
  require("logo-core")
  require("safemain")
-require("mio-circle")
  require("oops")
  require("hermes")
  require("hermes-std")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp

index a5ddb26dc954eebdede6fa3be0c08d2d6176fd58..ae677a321b0db3581b3a85387d46dd038e62412f 100644 (file)
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -104,6 +104,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will fold Depthwise Convolution operator with constant inputs");
  
+  arser.add_argument("--fold_gather")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold Gather operator");
+
    arser.add_argument("--fold_sparse_to_dense")
      .nargs(0)
      .required(false)
@@ -203,6 +209,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will remove Quantize-Dequantize sequence");
  
+  arser.add_argument("--remove_redundant_quantize")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will remove redundant Quantize operators");
+
    arser.add_argument("--remove_redundant_reshape")
      .nargs(0)
      .required(false)
@@ -452,6 +464,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::FoldDequantize);
    if (arser.get<bool>("--fold_dwconv"))
      options->enable(Algorithms::FoldDepthwiseConv2D);
+  if (arser.get<bool>("--fold_gather"))
+    options->enable(Algorithms::FoldGather);
    if (arser.get<bool>("--fold_sparse_to_dense"))
      options->enable(Algorithms::FoldSparseToDense);
    if (arser.get<bool>("--forward_reshape_to_unaryop"))
@@ -484,6 +498,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::RemoveFakeQuant);
    if (arser.get<bool>("--remove_quantdequant"))
      options->enable(Algorithms::RemoveQuantDequantSeq);
+  if (arser.get<bool>("--remove_redundant_quantize"))
+    options->enable(Algorithms::RemoveRedundantQuantize);
    if (arser.get<bool>("--remove_redundant_reshape"))
      options->enable(Algorithms::RemoveRedundantReshape);
    if (arser.get<bool>("--remove_redundant_transpose"))
diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt

index 3e2ddcbb3a68117e2aaa1d199e2055b65ebfdf07..b124d302734a61218d5dc0e3b767355cd3bb7f9b 100644 (file)
--- a/compiler/circlechef/CMakeLists.txt
+++ b/compiler/circlechef/CMakeLists.txt
@@ -1,12 +1,14 @@
  nnas_find_package(Protobuf QUIET)
  
  if(NOT Protobuf_FOUND)
+  message(STATUS "circlechef: SKIP (missing Protobuf)")
    return()
  endif(NOT Protobuf_FOUND)
  
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+  message(STATUS "circlechef: SKIP (missing mio-circle04)")
    return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
  
  # Recipe Parser
  add_subdirectory(proto)
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt

index 98a284c306b9047da04e10f18f912cd525a999a2..12dc7217bdf3843c259a56e072d7a419d46a40f4 100644 (file)
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES})
  target_include_directories(circlechef_circle PUBLIC include)
  target_include_directories(circlechef_circle PRIVATE src)
  target_link_libraries(circlechef_circle circlechef_proto)
-target_link_libraries(circlechef_circle mio_circle)
+target_link_libraries(circlechef_circle mio_circle04)
+target_link_libraries(circlechef_circle mio_circle04_helper)
  target_link_libraries(circlechef_circle cwrap)
  target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/circle/src/CircleImport.cpp b/compiler/circlechef/circle/src/CircleImport.cpp

index e970fbce3cd0c7ec16c88e9516c1ab58815b2241..f8756ef9484bdce80ec97c4ec170e67f159d4aeb 100644 (file)
--- a/compiler/circlechef/circle/src/CircleImport.cpp
+++ b/compiler/circlechef/circle/src/CircleImport.cpp
@@ -18,38 +18,13 @@
  
  #include "Convert.h"
  
+#include <mio_circle/Helper.h>
+
  #include <sstream>
  
  namespace circlechef
  {
  
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-  return kEmptyTensorName;
-}
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
  CircleImport::CircleImport(const circle::Model *model)
  {
    _subgraphs = model->subgraphs();
@@ -92,7 +67,7 @@ circle::BuiltinOperator CircleImport::builtin_code(const circle::Operator *op) c
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return mio::circle::builtin_code_neutral(opcode);
  }
  
  std::string CircleImport::opcode_name(const circle::Operator *op) const
@@ -101,14 +76,14 @@ std::string CircleImport::opcode_name(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::circle::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  if (is_custom(opcode))
+  if (mio::circle::is_custom(opcode))
    {
      if (!opcode->custom_code())
        return "(invalid custom)";
diff --git a/compiler/circlechef/circle/src/CircleImport.h b/compiler/circlechef/circle/src/CircleImport.h

index 23ca29beb0fed3c45eb38af13a6f75ebfc49fa08..9c1d161b69da2e60ddaa2b60a57bad715b3c3e30 100644 (file)
--- a/compiler/circlechef/circle/src/CircleImport.h
+++ b/compiler/circlechef/circle/src/CircleImport.h
@@ -34,11 +34,6 @@ using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>
  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>;
  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
  
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
diff --git a/compiler/circlechef/circle/src/RecipeChef.cpp b/compiler/circlechef/circle/src/RecipeChef.cpp

index cd520cbc36010a01e9f5ea94397b316eeaf3c641..e21bca8a680bd756d08f445c93129e6d0752326f 100644 (file)
--- a/compiler/circlechef/circle/src/RecipeChef.cpp
+++ b/compiler/circlechef/circle/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
   */
  
  #include <circlechef/RecipeChef.h>
+#include <mio_circle/Helper.h>
  
  #include "Convert.h"
  #include "CircleImport.h"
@@ -42,7 +43,7 @@ void set_inputs(CircleImport *import, circlechef::Operation *operation, const ci
      else
      {
        auto tensor = tensors->Get(input);
-      std::string name = tensor_name(tensor);
+      std::string name = mio::circle::tensor_name(tensor);
        operation->add_input(name);
      }
    }
@@ -56,7 +57,7 @@ void set_outputs(CircleImport *import, circlechef::Operation *operation, const c
    for (auto output : outputs)
    {
      auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
      operation->add_output(name);
    }
  }
@@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
  
      ::circlechef::Operand *operand = model_recipe->add_operand();
  
-    operand->set_name(tensor_name(tensor));
+    operand->set_name(mio::circle::tensor_name(tensor));
      operand->set_type(as_circlechef_type(tensor->type()));
  
      std::vector<int32_t> dims = as_index_vector(tensor->shape());
@@ -224,14 +225,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const circle::Model *model)
    for (const auto input : inputs)
    {
      auto tensor = tensors->Get(input);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
  
      model_recipe->add_input(name);
    }
    for (const auto output : outputs)
    {
      auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
  
      model_recipe->add_output(name);
    }
diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt

index 0e8f47483292259d0e34019e54c4440e2e6ced5d..415954767fdb3fe542e6cc74a6877ffb248e1c8b 100644 (file)
--- a/compiler/circlechef/core/CMakeLists.txt
+++ b/compiler/circlechef/core/CMakeLists.txt
@@ -7,7 +7,7 @@ target_include_directories(circlechef_core PUBLIC include)
  target_include_directories(circlechef_core PRIVATE src)
  target_link_libraries(circlechef_core PUBLIC circlechef_proto)
  target_link_libraries(circlechef_core PUBLIC circlechef_log)
-target_link_libraries(circlechef_core PUBLIC mio_circle)
+target_link_libraries(circlechef_core PUBLIC mio_circle04)
  target_link_libraries(circlechef_core PUBLIC souschef)
  target_link_libraries(circlechef_core PRIVATE nncc_coverage)
  
diff --git a/compiler/circlechef/core/src/ModelChef.cpp b/compiler/circlechef/core/src/ModelChef.cpp

index 6975f42a341b074f2dd46955c69a26b207a5f952..6c5206dfc3d6fa55de3f9c46a03e09e22b7d245c 100644 (file)
--- a/compiler/circlechef/core/src/ModelChef.cpp
+++ b/compiler/circlechef/core/src/ModelChef.cpp
@@ -520,6 +520,10 @@ GeneratedModel cook(const ::circlechef::ModelRecipe &model_recipe)
    for (auto const &opcode : builtin_code_map)
    {
      circle::OperatorCodeBuilder code_builder{*flatbuffer_builder};
+    int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    if (opcode.first < circle::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      dep_code = static_cast<int8_t>(opcode.first);
+    code_builder.add_deprecated_builtin_code(dep_code);
      code_builder.add_builtin_code(opcode.first);
      code_builder.add_version(opcode.second);
      auto code = code_builder.Finish();
diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake

index 2106146d790213c1b4d47f0d81045cdb3bac2280..a5d6bedaa58cbeac821525c2d2f9f8eb7b5e6432 100644 (file)
--- a/compiler/circlechef/requires.cmake
+++ b/compiler/circlechef/requires.cmake
@@ -1,9 +1,10 @@
  require("arser")
  require("nnkit")
  require("cwrap")
-require("mio-circle")
+require("mio-circle04")
  require("safemain")
  require("hermes")
  require("hermes-std")
  require("foder")
  require("souschef")
+require("circle-verify")
diff --git a/compiler/circlechef/tests/CMakeLists.txt b/compiler/circlechef/tests/CMakeLists.txt

index 773ff54031d71089ab3ee4e9fe3e82d6af67a7d3..7ae619f8b4aa736782b3605e44dc79be965fff19 100644 (file)
--- a/compiler/circlechef/tests/CMakeLists.txt
+++ b/compiler/circlechef/tests/CMakeLists.txt
@@ -3,6 +3,15 @@ set(CIRCLERECIPES_DIR "${CircleRecipes_DIR}")
  
  file(GLOB RECIPES RELATIVE ${CIRCLERECIPES_DIR} "${CIRCLERECIPES_DIR}/*/test.recipe")
  
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(CIRCLECHEF_REVERSE_PATH $<TARGET_FILE:circlechef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+  set(CIRCLECHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/reverse/circlechef-reverse)
+  message(STATUS "CIRCLECHEF_FILE_PATH = ${CIRCLECHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
  foreach(RECIPE IN ITEMS ${RECIPES})
    get_filename_component(RECIPE_PREFIX ${RECIPE} DIRECTORY)
  
@@ -18,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .circle
    add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    list(APPEND TESTS ${RECIPE_PREFIX})
@@ -44,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .circle
    add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND circlechef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS circlechef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    list(APPEND TESTS ${RECIPE_PREFIX})
@@ -68,16 +77,16 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
  
    # Generate .gen.recipe from generated .circle
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
  
    # now we are going to generate .gen.circle from .gen.recipe
    # to check generated .gen.recipe file is correct by using it.
    # as weight values may be different, binary comparision is not acceptable.
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
  
    list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
@@ -96,13 +105,13 @@ foreach(CIRCLEFILE IN ITEMS ${GEN_CIRCLEFILES})
  
    # Generate .gen.recipe from generated .circle
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND circlechef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS circlechef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${CIRCLECHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
  
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND circlechef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS circlechef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
  
    list(APPEND TESTS ${CIRCLE_PREFIX}.gen)
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt

index 7848ac722c0724a698feb348e7662e008cc6b87c..b65c06677251c594ad28d622a5bb46652013b155 100644 (file)
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -1,6 +1,7 @@
-if(NOT TARGET mio_circle)
+if(NOT TARGET mio_circle04)
+  message(STATUS "Skip circledump: mio_circle04 not found")
    return()
-endif(NOT TARGET mio_circle)
+endif(NOT TARGET mio_circle04)
  
  set(DRIVER "driver/Driver.cpp")
  
@@ -9,8 +10,8 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  add_executable(circledump ${DRIVER} ${SOURCES})
  target_include_directories(circledump PRIVATE include)
  target_link_libraries(circledump arser)
-target_link_libraries(circledump mio_circle)
+target_link_libraries(circledump mio_circle04)
+target_link_libraries(circledump mio_circle04_helper)
  target_link_libraries(circledump safemain)
-target_link_libraries(circledump flatbuffers-1.10)
  
  install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md

index e31c2d560c03567f77d77791f6aff0b18bcb1821..d2baf26b3750bd16949e90da346e4efadc55f70d 100644 (file)
--- a/compiler/circledump/README.md
+++ b/compiler/circledump/README.md
@@ -65,6 +65,6 @@ O T(3) ofm
  
  ### Dependency
  
-- mio-circle
+- mio-circle04
  - safemain
  - FlatBuffers
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake

index 81e0f0dbdba66eb34caf0fdbd0fb8b7bf3d46b40..362d67cf4a08563dc939bcd12f64249878d23723 100644 (file)
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,3 +1,3 @@
  require("arser")
-require("mio-circle")
+require("mio-circle04")
  require("safemain")
diff --git a/compiler/circledump/src/Dump.cpp b/compiler/circledump/src/Dump.cpp

index 42b4ad97a385ec86d17afd0a6466c5389a5fb81a..0b256dda8ef29bce582d9302fb689544f0da21e3 100644 (file)
--- a/compiler/circledump/src/Dump.cpp
+++ b/compiler/circledump/src/Dump.cpp
@@ -15,6 +15,7 @@
   */
  
  #include <circledump/Dump.h>
+#include <mio_circle/Helper.h>
  
  #include "Read.h"
  #include "OpPrinter.h"
@@ -141,7 +142,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
  
    // dump operands(tensors)
    os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
-     << "B(buffer index) OperandName" << std::endl;
+     << "B(buffer index) (variable) OperandName" << std::endl;
    for (uint32_t i = 0; i < tensors->Length(); ++i)
    {
      // TODO refactor to some better structure
@@ -151,7 +152,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
      if (tensor->shape())
        dims = circleread::as_index_vector(tensor->shape());
  
-    os << "T(" << reader.subgraph_index() << ":" << i << ") " << circleread::tensor_type(tensor)
+    os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::circle::tensor_type(tensor)
         << " ";
      os << "(" << dims << ") ";
      if (tensor->shape_signature())
@@ -160,7 +161,11 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
        os << "(" << dims_sig << ") ";
      }
      os << "B(" << tensor->buffer() << ") ";
-    os << circleread::tensor_name(tensor) << std::endl;
+    if (tensor->is_variable())
+    {
+      os << "(variable) ";
+    }
+    os << mio::circle::tensor_name(tensor) << std::endl;
  
      if (auto q_params = tensor->quantization())
      {
@@ -312,7 +317,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
        if (input >= 0)
        {
          auto tensor = tensors->Get(input);
-        os << circleread::tensor_name(tensor);
+        os << mio::circle::tensor_name(tensor);
        }
        os << std::endl;
      }
@@ -322,7 +327,7 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
        if (output >= 0)
        {
          auto tensor = tensors->Get(output);
-        os << circleread::tensor_name(tensor);
+        os << mio::circle::tensor_name(tensor);
        }
        os << std::endl;
      }
@@ -335,14 +340,14 @@ void dump_sub_graph(std::ostream &os, circleread::Reader &reader)
    for (const auto input : reader.inputs())
    {
      auto tensor = tensors->Get(input);
-    std::string name = circleread::tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
      os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
    }
  
    for (const auto output : reader.outputs())
    {
      auto tensor = tensors->Get(output);
-    std::string name = circleread::tensor_name(tensor);
+    std::string name = mio::circle::tensor_name(tensor);
      os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
    }
  
@@ -364,6 +369,7 @@ void dump_model(std::ostream &os, const circle::Model *model)
    auto opcodes = reader.opcodes();
    auto buffers = reader.buffers();
    auto metadata = reader.metadata();
+  auto signaturedefs = reader.signature_defs();
  
    // dump operator_codes
    os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -371,11 +377,14 @@ void dump_model(std::ostream &os, const circle::Model *model)
    for (auto opcode : opcodes)
    {
      circle::BuiltinOperator op_code = opcode->builtin_code();
-    auto op_name = circleread::opcode_name(opcode);
+    // cast to int32_t to print as number or int8_t will print as ascii code
+    int32_t dp_code = static_cast<int32_t>(opcode->deprecated_builtin_code());
+
+    auto op_name = mio::circle::opcode_name(opcode);
      auto op_version = opcode->version();
  
      os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
-       << ", version: " << op_version << ")" << std::endl;
+       << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
  
      opcode_index++;
    }
@@ -417,6 +426,37 @@ void dump_model(std::ostream &os, const circle::Model *model)
      os << std::endl;
    }
  
+  // dump signaturedef
+  if (signaturedefs != nullptr)
+  {
+    os << "SignatureDef" << std::endl;
+    for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+    {
+      auto sign_i = signaturedefs->Get(i);
+      os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+         << sign_i->subgraph_index() << ")" << std::endl;
+
+      auto inputs_i = sign_i->inputs();
+      for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+      {
+        auto inputs_i_t = inputs_i->Get(t);
+        os << "    I(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << inputs_i_t->tensor_index() << ") "
+           << inputs_i_t->name()->c_str() << std::endl;
+      }
+
+      auto outputs_i = sign_i->outputs();
+      for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+      {
+        auto outputs_i_t = outputs_i->Get(t);
+        os << "    O(" << t << ")"
+           << " T(" << sign_i->subgraph_index() << ":" << outputs_i_t->tensor_index() << ") "
+           << outputs_i_t->name()->c_str() << std::endl;
+      }
+    }
+    os << std::endl;
+  }
+
    for (uint32_t sg = 0; sg < num_subgraph; ++sg)
    {
      reader.select_subgraph(sg);
diff --git a/compiler/circledump/src/Load.cpp b/compiler/circledump/src/Load.cpp

index ec91ed189998fe17e8ca4d192dc5896ce1857b3c..67e7fa5a63e79d86d9a4ba0e04effdb991b80198 100644 (file)
--- a/compiler/circledump/src/Load.cpp
+++ b/compiler/circledump/src/Load.cpp
@@ -76,7 +76,7 @@ public:
    {
      if (_value != -1)
      {
-      // Close on descturction
+      // Close on destructor
        close(_value);
      }
    }
diff --git a/compiler/circledump/src/OpPrinter.cpp b/compiler/circledump/src/OpPrinter.cpp

index 7af3ff6412d3ddc9b14cd95cabc960845ccd7213..02e5c26b584d563328195a1962134917e77992a0 100644 (file)
--- a/compiler/circledump/src/OpPrinter.cpp
+++ b/compiler/circledump/src/OpPrinter.cpp
@@ -341,6 +341,7 @@ public:
           << ") ";
        os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
           << ") ";
+      os << "keep_num_dims(" << params->keep_num_dims() << ") ";
  
        os << std::endl;
      }
@@ -619,6 +620,23 @@ public:
    }
  };
  
+class SVDFPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_SVDFOptions())
+    {
+      os << "    ";
+      os << "rank(" << params->rank() << ") ";
+      os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  class TransposeConvPrinter : public OpPrinter
  {
  public:
@@ -754,6 +772,22 @@ public:
    }
  };
  
+class InstanceNormPrinter : public OpPrinter
+{
+public:
+  void options(const circle::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_InstanceNormOptions())
+    {
+      os << "    ";
+      os << "epsilon(" << params->epsilon() << ") ";
+      os << "Activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  OpPrinterRegistry::OpPrinterRegistry()
  {
    _op_map[circle::BuiltinOperator_ADD] = make_unique<AddPrinter>();
@@ -824,6 +858,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[circle::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
    _op_map[circle::BuiltinOperator_SUB] = make_unique<SubPrinter>();
    _op_map[circle::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+  _op_map[circle::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
    _op_map[circle::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
    // There is no Option for TOPK_V2
    _op_map[circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
@@ -835,6 +870,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    // Circle only
    _op_map[circle::BuiltinOperator_BCQ_FULLY_CONNECTED] = make_unique<BCQFullyConnectedPrinter>();
    _op_map[circle::BuiltinOperator_BCQ_GATHER] = make_unique<BCQGatherPrinter>();
+  _op_map[circle::BuiltinOperator_INSTANCE_NORM] = make_unique<InstanceNormPrinter>();
  }
  
  } // namespace circledump
diff --git a/compiler/circledump/src/Read.cpp b/compiler/circledump/src/Read.cpp

index db8298585b626588957ed99abbd3864605f454ea..3a7e98cdee6b5ded3fd80dc59bba763bd582456c 100644 (file)
--- a/compiler/circledump/src/Read.cpp
+++ b/compiler/circledump/src/Read.cpp
@@ -16,72 +16,21 @@
  
  #include "Read.h"
  
+#include <mio_circle/Helper.h>
+
  #include <sstream>
  #include <string>
  
  namespace circleread
  {
  
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const circle::Tensor *tensor)
-{
-  return circle::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const circle::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
  Reader::Reader(const circle::Model *model)
  {
    _version = model->version();
    _subgraphs = model->subgraphs();
    _buffers = model->buffers();
    _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
  
    auto opcodes = model->operator_codes();
    for (const ::circle::OperatorCode *opcode : *opcodes)
@@ -127,14 +76,14 @@ std::string Reader::opcode_name(const circle::Operator *op) const
    assert(index < _op_codes.size());
    const circle::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::circle::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  return circleread::opcode_name(opcode);
+  return mio::circle::opcode_name(opcode);
  }
  
  bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/circledump/src/Read.h b/compiler/circledump/src/Read.h

index c61a1ab6d49eeb28d3d0632634e3890f74226a64..05b0e507227e5dea95c074922ac60a3640135c3a 100644 (file)
--- a/compiler/circledump/src/Read.h
+++ b/compiler/circledump/src/Read.h
@@ -41,12 +41,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
-bool is_valid(const circle::OperatorCode *opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-const char *tensor_type(const circle::Tensor *tensor);
-const char *tensor_name(const circle::Tensor *tensor);
-
  /**
   * @brief Loads Circle file and provides helpers to access attributes
   */
@@ -58,6 +52,7 @@ private:
    using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
    using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<circle::Operator>>;
    using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>;
  
  public:
    Reader(const circle::Model *model);
@@ -75,6 +70,7 @@ public:
    const std::vector<int32_t> &outputs() const { return _outputs; }
    const circle::DataFormat &data_format() const { return _data_format; }
    const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
  
    uint32_t num_subgraph() const { return _subgraphs->Length(); }
  
@@ -95,6 +91,7 @@ private:
    const CircleTensors_t *_tensors{nullptr};
    const CircleOperators_t *_operators{nullptr};
    const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
  
    uint32_t _subgraph_index = 0;
    std::string _subgraph_name;
diff --git a/compiler/cli/CMakeLists.txt b/compiler/cli/CMakeLists.txt

index 2ab8c05292fda5e86d73ae9677a89372b22b608f..0fb99ddba64c15b887bcfb5dc586f1edece8297f 100644 (file)
--- a/compiler/cli/CMakeLists.txt
+++ b/compiler/cli/CMakeLists.txt
@@ -4,11 +4,11 @@ list(APPEND TESTS "src/App.test.cpp")
  add_library(cli ${SOURCES})
  target_include_directories(cli PUBLIC include)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  GTest_AddTEst(cli_test ${TESTS})
  target_link_libraries(cli_test cli)
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt

index 6de634a2509893c0925b333bea409c09c6454b14..404149c15e1263f4634b5d3975f101a64ccbaca3 100644 (file)
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -1,82 +1,63 @@
  #[[ Generate common python virtual enviornment ]]
-find_package(PythonInterp 3 QUIET)
-find_package(PythonLibs 3 QUIET)
+find_package(PythonInterp 3.8 QUIET)
+find_package(PythonLibs 3.8 QUIET)
  
  if(NOT ${PYTHONINTERP_FOUND})
    message(STATUS "Build common-artifacts: FALSE (Python3 is missing)")
    return()
  endif()
  
-if(${PYTHON_VERSION_MINOR} LESS 3)
-  message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.3)")
+if(${PYTHON_VERSION_MINOR} LESS 8)
+  message(STATUS "Build common-artifacts: FALSE (You need to install Python version higher than 3.8)")
    return()
  endif()
  
-# Create python virtual environment with tensorflow 1.13.2
-set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
-
-# Create python virtual environment with tensorflow 2.3.0
-set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
  # Create python virtual environment with tensorflow 2.6.0
  set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
  
-add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_1_13_2}
-)
-
-add_custom_command(
-  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
-  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
-)
  add_custom_command(
    OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
    COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
  )
  
-# Create requirements.txt and install required pip packages
-set(REQUIREMENTS_FILE "requirements.txt")
-set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
-set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
+# Create python virtual environment with tensorflow 2.8.0
+set(VIRTUALENV_OVERLAY_TF_2_8_0 "${NNCC_OVERLAY_DIR}/venv_2_8_0")
  
-# TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
  add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow==1.13.2" > ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
-  COMMAND ${VIRTUALENV_OVERLAY_TF_1_13_2}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
+  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_8_0}
+  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_8_0}
  )
  
-add_custom_command(
-  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.3.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_3_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0} --upgrade
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
-)
+# Create requirements.txt and install required pip packages
+set(REQUIREMENTS_FILE "requirements.txt")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_8_0 "${VIRTUALENV_OVERLAY_TF_2_8_0}/${REQUIREMENTS_FILE}")
  
  add_custom_command(
    OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
    COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
    COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
    COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
-  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
  )
  
+add_custom_command(
+  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.8.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install --upgrade pip setuptools
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_8_0}/bin/python3.8 -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0} --upgrade
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_8_0}
+)
+
  add_custom_target(common_artifacts_python_deps ALL
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
-          ${VIRTUALENV_OVERLAY_TF_2_3_0}
-          ${VIRTUALENV_OVERLAY_TF_2_6_0}
-          ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
-          ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
+          ${VIRTUALENV_OVERLAY_TF_2_8_0}
            ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+          ${REQUIREMENTS_OVERLAY_PATH_TF_2_8_0}
  )
  
  #[[ Generate common resources ]]
@@ -97,7 +78,6 @@ target_link_libraries(testDataGenerator PRIVATE arser)
  target_link_libraries(testDataGenerator PRIVATE foder)
  target_link_libraries(testDataGenerator PRIVATE luci_import)
  target_link_libraries(testDataGenerator PRIVATE luci_interpreter)
-target_link_libraries(testDataGenerator PRIVATE mio_circle)
  target_link_libraries(testDataGenerator PRIVATE safemain)
  
  unset(TEST_DEPS)
@@ -109,6 +89,7 @@ set(TFLITE_RECIPE_REPO "${TensorFlowLiteRecipes_DIR}")
  set(CIRCLE_RECIPE_REPO "${CircleRecipes_DIR}")
  set(TEST_RECIPE_FILENAME "test.recipe")
  set(TEST_RULE_FILENAME "test.rule")
+set(TEST_QCONFIG_FILENAME "test.qconf.json")
  
  set(MODEL2NNPKG "${NNAS_PROJECT_SOURCE_DIR}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh")
  # Get test case list
@@ -140,12 +121,20 @@ endmacro()
  
  include("exclude.lst")
  
+# TODO revise using variadic arguments
+macro(tcgenerate_option NAME OPTION ARG1 ARG2 ARG3)
+  set(TCGEN_OPT_${NAME} ${OPTION} ${ARG1} ${ARG2} ${ARG3})
+endmacro()
+
+include("options.lst")
+
  foreach(RECIPE IN ITEMS ${RECIPES})
    unset(OPT_FORMAT)
    unset(MODEL_FORMAT)
  
    set(RECIPE_FILE "${RECIPE}.recipe")
    set(RULE_FILE "${RECIPE}.rule")
+  set(QCONFIG_FILE "${RECIPE}.qconf.json")
    set(TFLITE_RECIPE_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
    set(CIRCLE_RECIPE_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_RECIPE_FILENAME}")
  
@@ -174,8 +163,20 @@ foreach(RECIPE IN ITEMS ${RECIPES})
      set(RULE_SOURCE_PATH ${CIRCLE_RULE_SOURCE_PATH})
    endif()
  
+  set(TFLITE_QCONFIG_SOURCE_PATH "${TFLITE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+  set(CIRCLE_QCONFIG_SOURCE_PATH "${CIRCLE_RECIPE_REPO}/${RECIPE}/${TEST_QCONFIG_FILENAME}")
+
+  unset(QCONFIG_SOURCE_PATH)
+  if(EXISTS "${TFLITE_QCONFIG_SOURCE_PATH}")
+    set(QCONFIG_SOURCE_PATH ${TFLITE_QCONFIG_SOURCE_PATH})
+  endif()
+  if(EXISTS "${CIRCLE_QCONFIG_SOURCE_PATH}")
+    set(QCONFIG_SOURCE_PATH ${CIRCLE_QCONFIG_SOURCE_PATH})
+  endif()
+
    set(RECIPE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RECIPE_FILE}")
    set(RULE_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${RULE_FILE}")
+  set(QCONFIG_BINARY_PATH "${CMAKE_CURRENT_BINARY_DIR}/${QCONFIG_FILE}")
  
    set(TFLITE_FILE "${RECIPE}.tflite")
    set(TFLITE_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${TFLITE_FILE}")
@@ -200,6 +201,16 @@ foreach(RECIPE IN ITEMS ${RECIPES})
      list(APPEND TEST_DEPS ${RULE_BINARY_PATH})
    endif()
  
+  if(DEFINED QCONFIG_SOURCE_PATH)
+    # Copy .qconf.json
+    add_custom_command(OUTPUT ${QCONFIG_BINARY_PATH}
+      COMMAND ${CMAKE_COMMAND} -E copy "${QCONFIG_SOURCE_PATH}" "${QCONFIG_BINARY_PATH}"
+      DEPENDS ${QCONFIG_SOURCE_PATH}
+      COMMENT "Generate ${QCONFIG_FILE}"
+    )
+    list(APPEND TEST_DEPS ${QCONFIG_BINARY_PATH})
+  endif()
+
    if(${MODEL_FORMAT} STREQUAL "tflite")
      # Generate .tflite
      add_custom_command(OUTPUT ${TFLITE_OUTPUT_PATH}
@@ -274,11 +285,21 @@ foreach(RECIPE IN ITEMS ${RECIPES})
      )
      list(APPEND TEST_DEPS ${TC_DIRECTORY})
  
+    # set ADDITIONAL_OPTIONS as empty (one space before closing is intentional)
+    set(ADDITIONAL_OPTIONS )
+    if(DEFINED TCGEN_OPT_${RECIPE})
+      set(ADDITIONAL_OPTIONS ${ADDITIONAL_OPTIONS} ${TCGEN_OPT_${RECIPE}})
+    endif()
+
      # Generate input.h5, expected.h5
      set(INPUT_HDF5_FILE "${TC_DIRECTORY}/input.h5")
      set(EXPECTED_HDF5_FILE "${TC_DIRECTORY}/expected.h5")
      add_custom_command(OUTPUT ${INPUT_HDF5_FILE} ${EXPECTED_HDF5_FILE}
-      COMMAND $<TARGET_FILE:testDataGenerator> --input_data ${INPUT_HDF5_FILE} --expected_data ${EXPECTED_HDF5_FILE} ${MODEL_FILE}
+      COMMAND $<TARGET_FILE:testDataGenerator>
+              --input_data ${INPUT_HDF5_FILE}
+              --expected_data ${EXPECTED_HDF5_FILE}
+              ${ADDITIONAL_OPTIONS}
+              ${MODEL_FILE}
        DEPENDS $<TARGET_FILE:testDataGenerator> ${MODEL_FILE} ${TC_DIRECTORY}
        COMMENT "Generate input.h5 and expected.h5 in ${NNPKG_FILE}/metadata/tc"
      )
diff --git a/compiler/common-artifacts/exclude.lst b/compiler/common-artifacts/exclude.lst

index f32e004130d0177316d352f7c932f040ddcdb1e2..92b07fde8b6c2cedf1fd25abacce70adcce6ce2f 100644 (file)
--- a/compiler/common-artifacts/exclude.lst
+++ b/compiler/common-artifacts/exclude.lst
@@ -14,7 +14,6 @@ optimize(UnidirectionalSequenceLSTM_001) # This recipe contains is_variable Tens
  tcgenerate(Abs_000)
  tcgenerate(AddN_000)
  tcgenerate(Add_001) # runtime doesn't support
-tcgenerate(Add_U8_000)
  tcgenerate(Add_STR_000) # STRING is not supported
  tcgenerate(Add_STR_001) # STRING is not supported
  tcgenerate(All_000)
@@ -26,32 +25,24 @@ tcgenerate(ArgMin_U8_000)
  tcgenerate(ArgMin_U8_001)
  tcgenerate(ArgMin_U8_002)
  tcgenerate(ArgMin_U8_003)
-tcgenerate(BatchMatMul_000)
  tcgenerate(BatchMatMulV2_000)
  tcgenerate(BatchMatMulV2_001)
  tcgenerate(BatchToSpaceND_000)
  tcgenerate(BroadcastTo_000) # luci-interpreter doesn't support custom operator
-tcgenerate(Cast_000)
-tcgenerate(Cast_001)
  tcgenerate(Ceil_000)
  tcgenerate(Conv2D_003) # runtime doesn't support dilation
  tcgenerate(Cos_000)
  tcgenerate(DepthwiseConv2D_001) # runtime doesn't support dilation
  tcgenerate(DepthwiseConv2D_003) # runtime doesn't support dilation
  tcgenerate(DepthwiseConv2D_U8_001)  # luci-interpreter doesn't support channel-wise quantization yet
-tcgenerate(Dequantize_000)  # runtime and luci-interpreter doesn't support Dequantize op yet
-tcgenerate(ExpandDims_000)
-tcgenerate(ExpandDims_001)
-tcgenerate(ExpandDims_002)
-tcgenerate(ExpandDims_003)
-tcgenerate(ExpandDims_004)
+tcgenerate(ExpandDims_001) # luci-interpreter doesn't support undefined shape
+tcgenerate(ExpandDims_002) # luci-interpreter doesn't support undefined shape
  tcgenerate(FakeQuant_000) # runtime and luci-interpreter doesn't support yet
  tcgenerate(Fill_000)
  tcgenerate(Fill_001)
  tcgenerate(FloorMod_000)
  tcgenerate(FloorMod_001)
  tcgenerate(FullyConnected_U8_000)
-tcgenerate(Gather_000)
  tcgenerate(GatherNd_000)
  tcgenerate(GatherNd_001)
  tcgenerate(L2Pool2D_U8_000)
@@ -75,8 +66,8 @@ tcgenerate(Mul_U8_000)
  tcgenerate(Neg_000)
  tcgenerate(Net_BroadcastTo_AddV2_001) # luci-interpreter doesn't support custom operator
  tcgenerate(Net_Conv_FakeQuant_000) # luci-interpreter doesn't support FakeQuant yet
-tcgenerate(Net_Conv_QuantDequant_000) # luci-interpreter doesn't support Quantize/Dequantize yet
  tcgenerate(Net_Dangle_001)
+tcgenerate(Net_Gather_SparseToDense_AddV2_000) # luci-interpreter doesn't support custom operator
  tcgenerate(Net_ZeroDim_001) # luci-interpreter doesn't support zero dim
  tcgenerate(OneHot_000)
  tcgenerate(OneHot_001)
@@ -157,13 +148,11 @@ tcgenerate(While_001) # Needs luci-interpreter int32_t support for ADD, EQUAL
  tcgenerate(While_002) # Needs luci-interpreter int32_t support for ADD, EQUAL
  tcgenerate(While_003) # Needs luci-interpreter int32_t support for ADD, EQUAL, and dynamic shape for WHILE
  tcgenerate(YUV_TO_RGB_000)
-tcgenerate(YUV_TO_RGB_U8_000)
  tcgenerate(ZerosLike_000)
  
  ## CircleRecipes
  tcgenerate(BCQFullyConnected_000)
  tcgenerate(BCQFullyConnected_001)
  tcgenerate(BCQGather_000)
-tcgenerate(CircleBatchMatMul_000)
  tcgenerate(InstanceNorm_000)
  tcgenerate(InstanceNorm_001)
diff --git a/compiler/common-artifacts/options.lst b/compiler/common-artifacts/options.lst

new file mode 100644 (file)

index 0000000..5e0ff9d
--- /dev/null
+++ b/compiler/common-artifacts/options.lst
@@ -0,0 +1,6 @@
+## Additional Options for test recipe
+
+#[[ tcgenerate_option : add additional option(s) for generation ]]
+
+# make valid 'indices' input value
+tcgenerate_option(Gather_001 --input_range indices 0 3)
diff --git a/compiler/common-artifacts/requires.cmake b/compiler/common-artifacts/requires.cmake

index d7bed21fe2fd77f2da4cb8f20954c93565a7b082..cc07e17f629039aa3fef79c7dd2bb78e2c17dda5 100644 (file)
--- a/compiler/common-artifacts/requires.cmake
+++ b/compiler/common-artifacts/requires.cmake
@@ -4,6 +4,6 @@ require("circlechef")
  require("foder")
  require("luci")
  require("luci-interpreter")
-require("mio-circle")
  require("safemain")
  require("tflchef")
+require("tflite2circle")
diff --git a/compiler/common-artifacts/src/TestDataGenerator.cpp b/compiler/common-artifacts/src/TestDataGenerator.cpp

index b00e93e885537f274fc199e6d07301e302b354c7..33cecbbe2c71fa0e6e490601f959c6d405587442 100644 (file)
--- a/compiler/common-artifacts/src/TestDataGenerator.cpp
+++ b/compiler/common-artifacts/src/TestDataGenerator.cpp
@@ -18,7 +18,6 @@
  #include <foder/FileLoader.h>
  #include <luci/Importer.h>
  #include <luci_interpreter/Interpreter.h>
-#include <mio/circle/schema_generated.h>
  
  #include <H5Cpp.h>
  
@@ -27,6 +26,9 @@
  #include <memory>
  #include <random>
  #include <string>
+#include <vector>
+#include <cassert>
+#include <cstdlib>
  
  namespace
  {
@@ -43,6 +45,8 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
    {
      case loco::DataType::U8:
        return H5::PredType::NATIVE_UINT8;
+    case loco::DataType::S16:
+      return H5::PredType::NATIVE_INT16;
      case loco::DataType::S32:
        return H5::PredType::NATIVE_INT32;
      case loco::DataType::S64:
@@ -56,7 +60,7 @@ H5::PredType hdf5_dtype_cast(const loco::DataType loco_dtype)
    }
  }
  
-template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, uint32_t size)
+template <typename T> void generate_random_data(std::mt19937 &gen, void *data, uint32_t size)
  {
    std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
    for (uint32_t i = 0; i < size; i++)
@@ -65,7 +69,7 @@ template <typename T> void geneate_random_data(std::mt19937 &gen, void *data, ui
    }
  }
  
-template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size)
+template <> void generate_random_data<bool>(std::mt19937 &gen, void *data, uint32_t size)
  {
    std::normal_distribution<float> distrib(0, 2); // mean(0), stddev(2)
    for (uint32_t i = 0; i < size; i++)
@@ -74,6 +78,20 @@ template <> void geneate_random_data<bool>(std::mt19937 &gen, void *data, uint32
    }
  }
  
+template <typename T>
+void generate_random_range(void *data, uint32_t size, int32_t range_min, int32_t range_max)
+{
+  assert(range_min <= range_max);
+
+  for (uint32_t i = 0; i < size; i++)
+  {
+    // +1 will make value of [range_min, range_max]
+    int32_t range = range_max - range_min + 1;
+    int32_t value = (rand() % range) + range_min;
+    static_cast<T *>(data)[i] = static_cast<T>(value);
+  }
+}
+
  void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t seed)
  {
    std::mt19937 gen(seed); // standard mersenne_twister_engine seeded with rd()
@@ -81,19 +99,38 @@ void fill_random_data(void *data, uint32_t size, loco::DataType dtype, uint32_t
    switch (dtype)
    {
      case loco::DataType::U8:
-      geneate_random_data<uint8_t>(gen, data, size);
+      generate_random_data<uint8_t>(gen, data, size);
+      break;
+    case loco::DataType::S16:
+      generate_random_data<int16_t>(gen, data, size);
        break;
      case loco::DataType::S32:
-      geneate_random_data<int32_t>(gen, data, size);
+      generate_random_data<int32_t>(gen, data, size);
        break;
      case loco::DataType::S64:
-      geneate_random_data<int64_t>(gen, data, size);
+      generate_random_data<int64_t>(gen, data, size);
        break;
      case loco::DataType::FLOAT32:
-      geneate_random_data<float>(gen, data, size);
+      generate_random_data<float>(gen, data, size);
        break;
      case loco::DataType::BOOL:
-      geneate_random_data<bool>(gen, data, size);
+      generate_random_data<bool>(gen, data, size);
+      break;
+    default:
+      throw std::runtime_error("NYI data type.");
+  }
+}
+
+void fill_random_range(void *data, uint32_t size, loco::DataType dtype, int32_t range_min,
+                       int32_t range_max)
+{
+  switch (dtype)
+  {
+    case loco::DataType::S32:
+      generate_random_range<int32_t>(data, size, range_min, range_max);
+      break;
+    case loco::DataType::S64:
+      generate_random_range<int64_t>(data, size, range_min, range_max);
        break;
      default:
        throw std::runtime_error("NYI data type.");
@@ -120,6 +157,11 @@ int entry(int argc, char **argv)
      .required(false)
      .nargs(0)
      .help("Put a fixed seed into the random number generator");
+  arser.add_argument("--input_range")
+    .required(false)
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .help("Set random number range [min max] for the input as 'name min max'");
  
    try
    {
@@ -176,6 +218,24 @@ int entry(int argc, char **argv)
    std::unique_ptr<H5::Group> output_value_group =
      std::make_unique<H5::Group>(output_file.createGroup("value"));
  
+  std::string range_name;
+  int32_t range_min = 0;
+  int32_t range_max = 0;
+  bool range_check = false;
+  bool range_input_found = false;
+  if (arser["--input_range"])
+  {
+    // NOTE limitation: we can only set one input range
+    // TODO expand this for multiple inputs
+    std::vector<std::string> values = arser.get<std::vector<std::string>>("--input_range");
+    assert(values.size() == 3);
+    range_name = values.at(0);
+    // TODO add check for valid numbers
+    range_min = std::atoi(values.at(1).c_str());
+    range_max = std::atoi(values.at(2).c_str());
+    range_check = true;
+  }
+
    std::random_device rd; // used to obtain a seed for the random number engine
    uint32_t input_index = 0;
    // TODO remove indentation
@@ -187,6 +247,7 @@ int entry(int argc, char **argv)
      {
        const auto *input_node = dynamic_cast<const luci::CircleInput *>(node);
        std::string name = input_node->name();
+      assert(not name.empty());
        if (name.find(":") == std::string::npos)
          name += ":0";
  
@@ -217,7 +278,12 @@ int entry(int argc, char **argv)
        std::vector<int8_t> data(byte_size);
  
        // generate random data
-      if (arser["--fixed_seed"])
+      if (range_name == input_node->name())
+      {
+        fill_random_range(data.data(), data_size, input_node->dtype(), range_min, range_max);
+        range_input_found = true;
+      }
+      else if (arser["--fixed_seed"])
          fill_random_data(data.data(), data_size, input_node->dtype(), 0);
        else
          fill_random_data(data.data(), data_size, input_node->dtype(), rd());
@@ -230,6 +296,12 @@ int entry(int argc, char **argv)
      }
    }
  
+  if (range_check && not range_input_found)
+  {
+    std::cerr << "ERROR: input_range for input [" << range_name << "] not found." << std::endl;
+    return EXIT_FAILURE;
+  }
+
    interpreter.interpret();
  
    // dump output data into hdf5 file
diff --git a/compiler/dio-hdf5/CMakeLists.txt b/compiler/dio-hdf5/CMakeLists.txt

new file mode 100644 (file)

index 0000000..199c0d5
--- /dev/null
+++ b/compiler/dio-hdf5/CMakeLists.txt
@@ -0,0 +1,30 @@
+nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
+
+if(NOT HDF5_FOUND)
+  message(STATUS "Build dio_hdf5: FAILED (missing HDF5)")
+  return()
+endif(NOT HDF5_FOUND)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(dio_hdf5 SHARED ${SOURCES})
+target_include_directories(dio_hdf5 PUBLIC include)
+target_include_directories(dio_hdf5 PUBLIC ${HDF5_INCLUDE_DIRS})
+target_link_libraries(dio_hdf5 PUBLIC ${HDF5_CXX_LIBRARIES})
+target_link_libraries(dio_hdf5 PUBLIC loco)
+
+install(TARGETS dio_hdf5 DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(dio_hdf5_test ${TESTS})
+target_include_directories(dio_hdf5_test PRIVATE include)
+target_link_libraries(dio_hdf5_test dio_hdf5)
diff --git a/compiler/dio-hdf5/README.md b/compiler/dio-hdf5/README.md

new file mode 100644 (file)

index 0000000..aa2398c
--- /dev/null
+++ b/compiler/dio-hdf5/README.md
@@ -0,0 +1,29 @@
+# dio-hdf5
+
+_dio-hdf5_ is a library to help loading hdf5 files (_dio_ indicates data I/O).
+
+The hdf5 file should have the following structure.
+
+```
+Group "/"
+ > Group <group_name>
+   > Group <data_idx>
+     > Dataset <input_idx>
+```
+
+## Example
+
+```cpp
+dio_hdf5::HDF5Importer h5{input_path};
+
+h5.importGroup("value");
+
+// Prepare buffer
+const uint32_t input_byte_size = 16;
+std::vector<char> buffer(input_byte_size);
+
+// Write the first input of the first data to buffer
+readTensor(0, 0, buffer.data());
+
+DO_SOMETHING_WITH(buffer);
+```
diff --git a/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h

new file mode 100644 (file)

index 0000000..aafcfbb
--- /dev/null
+++ b/compiler/dio-hdf5/include/dio_hdf5/HDF5Importer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __DIO_HDF5_H__
+#define __DIO_HDF5_H__
+
+#include <H5Cpp.h>
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace dio
+{
+namespace hdf5
+{
+
+// HDF5Importer reads an input data saved in the hdf5 file in the given path
+// The hierarchy of the hdf5 file is as follows.
+// Group "/"
+//  > Group <group_name>
+//    > Group <data_idx>
+//      > Dataset <input_idx>
+// data_idx : index of the data (dataset file can contain multiple data)
+// input_idx : index of the input (DNN model can have multiple inputs)
+// Ex: the j'th input of the i'th data of group 'value' can be accessed by "/value/i/j"
+class HDF5Importer final
+{
+public:
+  explicit HDF5Importer(const std::string &path);
+
+public:
+  /**
+   * @note importGroup has to be called before readTensor is called
+   *        Otherwise, readTensor will throw an exception
+   */
+  void importGroup(const std::string &group) { _group = _file.openGroup(group); }
+
+  /**
+   * @brief Read tensor data from file and store it into buffer
+   * @details A tensor in the file can be retrieved with (data_idx, input_idx)
+   * @param data_idx : index of the data
+   * @param input_idx : index of the input
+   * @param dtype : pointer to write the tensor's data type
+   * @param shape : pointer to write the tensor's shape
+   * @param buffer : pointer to write the tensor's data
+   */
+  void readTensor(int32_t data_idx, int32_t input_idx, loco::DataType *dtype,
+                  std::vector<loco::Dimension> *shape, void *buffer);
+
+  // Read a raw tensor (no type/shape is specified)
+  void readTensor(int32_t data_idx, int32_t input_idx, void *buffer);
+
+  bool isRawData() { return _group.attrExists("rawData"); }
+
+  int32_t numData() { return _group.getNumObjs(); }
+
+  int32_t numInputs(int32_t data_idx);
+
+private:
+  H5::H5File _file;
+  H5::Group _group;
+};
+
+} // namespace hdf5
+} // namespace dio
+
+#endif // __DIO_HDF5_H__
diff --git a/compiler/dio-hdf5/requires.cmake b/compiler/dio-hdf5/requires.cmake

new file mode 100644 (file)

index 0000000..44f6870
--- /dev/null
+++ b/compiler/dio-hdf5/requires.cmake
@@ -0,0 +1 @@
+require("loco")
diff --git a/compiler/dio-hdf5/src/HDF5Importer.cpp b/compiler/dio-hdf5/src/HDF5Importer.cpp

new file mode 100644 (file)

index 0000000..9ae556b
--- /dev/null
+++ b/compiler/dio-hdf5/src/HDF5Importer.cpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dio_hdf5/HDF5Importer.h"
+
+#include <H5Cpp.h>
+
+#include <string>
+#include <vector>
+#include <cassert>
+#include <stdexcept>
+
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace
+{
+
+Shape toInternalShape(const H5::DataSpace &dataspace)
+{
+  int rank = dataspace.getSimpleExtentNdims();
+
+  std::vector<hsize_t> dims;
+  dims.resize(rank, 0);
+  dataspace.getSimpleExtentDims(dims.data());
+
+  Shape res;
+  for (int axis = 0; axis < rank; ++axis)
+  {
+    res.emplace_back(dims[axis]);
+  }
+
+  return res;
+}
+
+DataType toInternalDtype(const H5::DataType &h5_type)
+{
+  if (h5_type == H5::PredType::IEEE_F32BE || h5_type == H5::PredType::IEEE_F32LE)
+  {
+    return DataType::FLOAT32;
+  }
+  if (h5_type == H5::PredType::STD_I32BE || h5_type == H5::PredType::STD_I32LE)
+  {
+    return DataType::S32;
+  }
+  if (h5_type == H5::PredType::STD_I64BE || h5_type == H5::PredType::STD_I64LE)
+  {
+    return DataType::S64;
+  }
+  if (h5_type.getClass() == H5T_class_t::H5T_ENUM)
+  {
+    // We follow the numpy format
+    // In numpy 1.19.0, np.bool_ is saved as H5T_ENUM
+    // - (name, value) -> (FALSE, 0) and (TRUE, 1)
+    // - value dtype is H5T_STD_I8LE
+    // TODO Find a general way to recognize BOOL type
+    char name[10];
+    int8_t value[2] = {0, 1};
+    if (H5Tenum_nameof(h5_type.getId(), value, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "FALSE")
+      return DataType::Unknown;
+
+    if (H5Tenum_nameof(h5_type.getId(), value + 1, name, 10) < 0)
+      return DataType::Unknown;
+
+    if (std::string(name) != "TRUE")
+      return DataType::Unknown;
+
+    return DataType::BOOL;
+  }
+  // TODO Support more datatypes
+  return DataType::Unknown;
+}
+
+void readTensorData(H5::DataSet &tensor, uint8_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_UINT8);
+}
+
+void readTensorData(H5::DataSet &tensor, float *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_FLOAT);
+}
+
+void readTensorData(H5::DataSet &tensor, int32_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_INT);
+}
+
+void readTensorData(H5::DataSet &tensor, int64_t *buffer)
+{
+  tensor.read(buffer, H5::PredType::NATIVE_LONG);
+}
+
+} // namespace
+
+namespace dio
+{
+namespace hdf5
+{
+
+HDF5Importer::HDF5Importer(const std::string &path)
+{
+  if (_file.isHdf5(path) == false)
+    throw std::runtime_error("Given data file is not HDF5");
+
+  _file = H5::H5File(path, H5F_ACC_RDONLY);
+}
+
+int32_t HDF5Importer::numInputs(int32_t record_idx)
+{
+  auto records = _group.openGroup(std::to_string(record_idx));
+  return records.getNumObjs();
+}
+
+void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer)
+{
+  auto record = _group.openGroup(std::to_string(record_idx));
+  auto tensor = record.openDataSet(std::to_string(input_idx));
+
+  readTensorData(tensor, static_cast<uint8_t *>(buffer));
+}
+
+void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
+                              void *buffer)
+{
+  auto record = _group.openGroup(std::to_string(record_idx));
+  auto tensor = record.openDataSet(std::to_string(input_idx));
+
+  auto tensor_dtype = tensor.getDataType();
+  *dtype = toInternalDtype(tensor_dtype);
+
+  auto tensor_shape = tensor.getSpace();
+  *shape = toInternalShape(tensor_shape);
+
+  switch (*dtype)
+  {
+    case DataType::FLOAT32:
+      readTensorData(tensor, static_cast<float *>(buffer));
+      break;
+    case DataType::S32:
+      readTensorData(tensor, static_cast<int32_t *>(buffer));
+      break;
+    case DataType::S64:
+      readTensorData(tensor, static_cast<int64_t *>(buffer));
+      break;
+    case DataType::BOOL:
+      readTensorData(tensor, static_cast<uint8_t *>(buffer));
+      break;
+    default:
+      throw std::runtime_error{"Unsupported data type for input data (.h5)"};
+  }
+}
+
+} // namespace hdf5
+} // namespace dio
diff --git a/compiler/dio-hdf5/src/HDF5Importer.test.cpp b/compiler/dio-hdf5/src/HDF5Importer.test.cpp

new file mode 100644 (file)

index 0000000..61a027f
--- /dev/null
+++ b/compiler/dio-hdf5/src/HDF5Importer.test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dio_hdf5/HDF5Importer.h"
+
+#include <loco.h>
+
+#include <H5Cpp.h>
+
+#include <cstdio>
+
+#include <gtest/gtest.h>
+
+using HDF5Importer = dio::hdf5::HDF5Importer;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
+
+namespace
+{
+
+const std::string file_name("dio_hdf5_test.h5");
+
+void createFile()
+{
+  // File already exists. Remove it.
+  if (auto f = fopen(file_name.c_str(), "r"))
+  {
+    fclose(f);
+    if (remove(file_name.c_str()) != 0)
+      throw std::runtime_error("Error deleting file.");
+  }
+
+  const auto rank = 3;
+  hsize_t dim[3] = {1, 2, 3};
+  H5::DataSpace space(rank, dim);
+
+  float data[] = {0, 1, 2, 3, 4, 5};
+
+  // Create test file in the current directory
+  H5::H5File file(file_name, H5F_ACC_TRUNC);
+  {
+    file.createGroup("/value");
+    file.createGroup("/value/0");
+    H5::DataSet dataset(file.createDataSet("/value/0/0", H5::PredType::IEEE_F32BE, space));
+    dataset.write(data, H5::PredType::IEEE_F32LE);
+  }
+}
+
+} // namespace
+
+TEST(dio_hdf5_test, read_with_type_shape)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  h5.readTensor(0, 0, &dtype, &shape, buffer.data());
+
+  for (uint32_t i = 0; i < 6; i++)
+    EXPECT_EQ(i, buffer[i]);
+
+  EXPECT_EQ(DataType::FLOAT32, dtype);
+  EXPECT_EQ(3, shape.size());
+  EXPECT_EQ(1, shape[0]);
+  EXPECT_EQ(2, shape[1]);
+  EXPECT_EQ(3, shape[2]);
+}
+
+TEST(dio_hdf5_test, wrong_path_NEG)
+{
+  const std::string wrong_path = "not_existing_file_for_dio_hdf5_test";
+
+  EXPECT_ANY_THROW(HDF5Importer h5(wrong_path));
+}
+
+TEST(dio_hdf5_test, wrong_group_name_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  EXPECT_ANY_THROW(h5.importGroup("wrong"));
+}
+
+TEST(dio_hdf5_test, data_out_of_index_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  // Read non-existing data (data_idx = 1)
+  EXPECT_ANY_THROW(h5.readTensor(1, 0, &dtype, &shape, buffer.data()));
+}
+
+TEST(dio_hdf5_test, input_out_of_index_NEG)
+{
+  createFile();
+
+  HDF5Importer h5(::file_name);
+
+  h5.importGroup("value");
+
+  std::vector<float> buffer(6);
+
+  DataType dtype;
+  Shape shape;
+  // Read non-existing input (input_idx = 1)
+  EXPECT_ANY_THROW(h5.readTensor(0, 1, &dtype, &shape, buffer.data()));
+}
diff --git a/compiler/dredd-rule-lib/rule-lib.sh b/compiler/dredd-rule-lib/rule-lib.sh

index 9254cc9a7b4967e1a689d17345a49cb79105029f..c25dc5fb46079399e9a80cc9e201558d04d8e143 100755 (executable)
--- a/compiler/dredd-rule-lib/rule-lib.sh
+++ b/compiler/dredd-rule-lib/rule-lib.sh
@@ -217,4 +217,21 @@ op_version()
    echo ${ACTUAL}
  }
  
+tensor_dtype()
+{
+  argc_check $# 1
+  file_path_check ${COMPILED_FILE}
+  file_path_check ${INSPECT_PROG_PATH}
+
+  set -o pipefail
+
+  ACTUAL=`init_error_log ; \
+          ${INSPECT_PROG_PATH} --tensor_dtype ${COMPILED_FILE} | \
+          awk -v tensor_name="$1" '{ if ($1 == tensor_name) print $2}'`
+
+  check_success_exit_code $? 0
+
+  echo ${ACTUAL}
+}
+
  # TODO define more qullity test function
diff --git a/compiler/embedded-import-value-test/.gitignore b/compiler/embedded-import-value-test/.gitignore

new file mode 100644 (file)

index 0000000..8dbfa90
--- /dev/null
+++ b/compiler/embedded-import-value-test/.gitignore
@@ -0,0 +1 @@
+/test.local.lst
diff --git a/compiler/embedded-import-value-test/CMakeLists.txt b/compiler/embedded-import-value-test/CMakeLists.txt

new file mode 100644 (file)

index 0000000..785edfc
--- /dev/null
+++ b/compiler/embedded-import-value-test/CMakeLists.txt
@@ -0,0 +1,34 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+set(SRCS_TEST_DRIVER src/TestDriver.cpp)
+
+# create driver
+add_executable(test_driver ${SRCS_TEST_DRIVER})
+target_link_libraries(test_driver PRIVATE luci_interpreter_import)
+target_link_libraries(test_driver PRIVATE luci_interpreter)
+target_link_libraries(test_driver PRIVATE safemain)
+
+unset(EMBEDDED_IMPORT_VALUE_TESTS)
+
+macro(addeval NAME)
+  list(APPEND EMBEDDED_IMPORT_VALUE_TESTS ${NAME})
+endmacro(addeval)
+
+# Read "test.lst"
+include("test.lst")
+# Read "test.local.lst" if exists
+include("test.local.lst" OPTIONAL)
+
+# Generate dependencies
+add_custom_target(embedded_import_testfiles ALL DEPENDS ${TESTFILES})
+
+get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+add_test(NAME embedded_import_value_test
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+          "${CMAKE_CURRENT_BINARY_DIR}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${EMBEDDED_IMPORT_VALUE_TESTS}
+)
diff --git a/compiler/embedded-import-value-test/README.md b/compiler/embedded-import-value-test/README.md

new file mode 100644 (file)

index 0000000..71a9548
--- /dev/null
+++ b/compiler/embedded-import-value-test/README.md
@@ -0,0 +1,13 @@
+# embedded-import-value-test
+
+`embedded-import-value-test` checks models imported with and without constant copying produces same output values.
+
+The test proceeds as follows:
+
+1. Generate random input for provided circle model.
+
+2. Import circle model to luci in 2 modes:
+   - With constant copying (default mode).
+   - Without constant copying (experimental feature)
+
+3. Compare the execution result of both modes. The result must be the same.
diff --git a/compiler/embedded-import-value-test/evalverify.sh b/compiler/embedded-import-value-test/evalverify.sh

new file mode 100755 (executable)

index 0000000..a99e76f
--- /dev/null
+++ b/compiler/embedded-import-value-test/evalverify.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# This script verifies that imported without constants copying models executes well in luci_interpreter
+#
+# HOW TO USE
+#
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of embedded-import-value-test (ex: build/compiler/embedded-import-value-test)
+# work_dir : artifacts directory where test materials exist
+
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+TEST_DRIVER_PATH="${BINDIR}/test_driver"
+TEST_RESULT_DIR="${BINDIR}/result"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+mkdir -p "${TEST_RESULT_DIR}"
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${TEST_RESULT_DIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "${TEST_DRIVER_PATH}" --model "${TESTCASE_FILE}.circle"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/embedded-import-value-test/requires.cmake b/compiler/embedded-import-value-test/requires.cmake

new file mode 100644 (file)

index 0000000..f8af5f2
--- /dev/null
+++ b/compiler/embedded-import-value-test/requires.cmake
@@ -0,0 +1,6 @@
+require("common-artifacts")
+require("luci")
+require("luci-interpreter")
+require("safemain")
+require("oops")
+require("loco")
diff --git a/compiler/embedded-import-value-test/src/TestDriver.cpp b/compiler/embedded-import-value-test/src/TestDriver.cpp

new file mode 100644 (file)

index 0000000..63fd745
--- /dev/null
+++ b/compiler/embedded-import-value-test/src/TestDriver.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <luci_interpreter/GraphBuilderRegistry.h>
+#include <luci_interpreter/Interpreter.h>
+
+#include <luci/Importer.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <string>
+#include <random>
+
+namespace
+{
+
+uint32_t tensor_size_of(const luci::CircleNode *node)
+{
+  uint32_t tensor_size = loco::size(node->dtype());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    tensor_size *= node->dim(i).value();
+  return tensor_size;
+}
+
+std::vector<uint8_t> random_data_for(const luci::CircleInput *node)
+{
+  // allocate data buffer
+  std::vector<uint8_t> inputs_data(tensor_size_of(node));
+  auto *buffer = inputs_data.data();
+
+  // define size of buffer in elements
+  const auto dtype = node->dtype();
+  assert(inputs_data.size() % loco::size(dtype) == 0); // FIX ME UNLESS
+  const auto element_count = inputs_data.size() / loco::size(dtype);
+
+  // random generator engine
+  std::random_device device;
+  std::mt19937 engine{device()};
+
+  // fill buffer with random data
+  switch (node->dtype())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      auto element_buffer = reinterpret_cast<float *>(buffer);
+
+      std::uniform_real_distribution<float> distrib(-3, 3);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      auto element_buffer = buffer;
+
+      std::uniform_int_distribution<uint8_t> distrib(100, 200);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      auto element_buffer = reinterpret_cast<int16_t *>(buffer);
+
+      std::uniform_int_distribution<int16_t> distrib(0, 100);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::S32:
+    {
+      auto element_buffer = reinterpret_cast<int32_t *>(buffer);
+
+      std::uniform_int_distribution<int32_t> distrib(0, 100);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    case loco::DataType::BOOL:
+    {
+      // num of bool data type is equivalent to uint8_t num in [0, 1] range
+      auto element_buffer = buffer;
+
+      std::uniform_int_distribution<uint8_t> distrib(0, 1);
+      const auto generator = [&distrib, &engine]() { return distrib(engine); };
+      std::generate(element_buffer, element_buffer + element_count, generator);
+
+      break;
+    }
+    default:
+      // TODO Support other dtypes
+      throw std::runtime_error("Unsupported data type, yet!");
+  }
+
+  return inputs_data;
+}
+
+} // namespace
+
+int entry(int argc, char **argv)
+{
+  // check arguments
+  if (argc != 3 || std::string(argv[1]) != "--model")
+  {
+    std::cerr << "Usage: " << argv[0] << " --model <path/to/model>" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // open file with model
+  const auto model_file = std::string(argv[2]);
+  std::ifstream fs(model_file, std::ifstream::binary);
+  if (fs.fail())
+  {
+    std::cerr << "Cannot open model file \"" << model_file << "\"." << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // create constant circle model
+  const std::vector<char> model_buffer((std::istreambuf_iterator<char>(fs)),
+                                       std::istreambuf_iterator<char>());
+  const auto circle_model = circle::GetModel(model_buffer.data());
+
+  // create random model's inputs
+  std::vector<std::vector<uint8_t>> inputs_data;
+  {
+    // model inputs
+    auto model = luci::Importer(nullptr).importModule(circle_model);
+    const auto inputs = loco::input_nodes(model->graph());
+
+    // create random data for each input
+    for (const auto *input : inputs)
+    {
+      const auto input_node = loco::must_cast<const luci::CircleInput *>(input);
+      inputs_data.emplace_back(random_data_for(input_node));
+    }
+  }
+
+  // interpret given module
+  const auto interpret_module_and_compute_output =
+    [&](const std::unique_ptr<luci::Module> &module) {
+      // create interpreter
+      luci_interpreter::Interpreter interpreter(module.get());
+
+      // model's input and output nodes
+      const auto input_nodes = loco::input_nodes(module->graph());
+      const auto output_nodes = loco::output_nodes(module->graph());
+
+      // set inputs
+      for (uint32_t i = 0; i < input_nodes.size(); ++i)
+      {
+        const auto input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[i]);
+        const auto &data = inputs_data.at(i);
+        interpreter.writeInputTensor(input_node, data.data(), data.size());
+      }
+
+      // do inference
+      interpreter.interpret();
+
+      // read outputs
+      std::vector<std::vector<uint8_t>> outputs_data;
+      for (const auto *node : output_nodes)
+      {
+        const auto output_node = loco::must_cast<const luci::CircleOutput *>(node);
+
+        // allocate output buffer
+        outputs_data.emplace_back(tensor_size_of(output_node));
+
+        auto &data = outputs_data.back();
+        interpreter.readOutputTensor(output_node, data.data(), data.size());
+      }
+
+      return outputs_data;
+    };
+
+  // import with copying, execute and save
+  std::vector<std::vector<uint8_t>> outputs_data_1;
+  {
+    const auto default_source = &luci::GraphBuilderRegistry::get();
+    const auto module = luci::Importer(default_source).importModule(circle_model);
+    if (not module)
+    {
+      std::cerr << "Fail to import model with constant copying." << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    outputs_data_1 = interpret_module_and_compute_output(module);
+  }
+
+  // import without copying, execute and save
+  std::vector<std::vector<uint8_t>> outputs_data_2;
+  {
+    const auto optimized_source = luci_interpreter::source_without_constant_copying();
+    const auto module = luci::Importer(optimized_source.get()).importModule(circle_model);
+    if (not module)
+    {
+      std::cerr << "Fail to import model without constant copying." << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    outputs_data_2 = interpret_module_and_compute_output(module);
+  }
+
+  // check all tensors are equal
+  assert(outputs_data_1.size() == outputs_data_2.size());
+  for (uint32_t n = 0; n < outputs_data_1.size(); ++n)
+  {
+    const auto &output_1 = outputs_data_1.at(n);
+    const auto &output_2 = outputs_data_2.at(n);
+    assert(output_1.size() == output_2.size());
+
+    for (uint32_t o = 0; o < output_1.size(); ++o)
+    {
+      if (output_1[o] != output_2[o])
+      {
+        std::cerr << "Values mismatch in model's output number " << n << std::endl;
+        return EXIT_FAILURE;
+      }
+    }
+  }
+
+  std::cout << "[TEST PASSED]" << std::endl;
+  return EXIT_SUCCESS;
+}
diff --git a/compiler/embedded-import-value-test/test.lst b/compiler/embedded-import-value-test/test.lst

new file mode 100644 (file)

index 0000000..924a60d
--- /dev/null
+++ b/compiler/embedded-import-value-test/test.lst
@@ -0,0 +1,192 @@
+#addeval(Abs_000)
+addeval(Add_000)
+#addeval(Add_001)
+addeval(Add_U8_000)
+#addeval(AddN_000)
+addeval(ArgMax_000)
+addeval(ArgMax_001)
+addeval(ArgMax_002)
+addeval(ArgMax_003)
+addeval(ArgMax_U8_000)
+addeval(ArgMax_U8_001)
+addeval(ArgMax_U8_002)
+addeval(ArgMax_U8_003)
+#addeval(ArgMin_000)
+#addeval(ArgMin_001)
+#addeval(ArgMin_002)
+#addeval(ArgMin_003)
+#addeval(ArgMin_U8_000)
+#addeval(ArgMin_U8_001)
+#addeval(ArgMin_U8_002)
+#addeval(ArgMin_U8_003)
+addeval(AveragePool2D_000)
+#addeval(BatchMatMul_000)
+#addeval(BatchMatMulV2_000)
+#addeval(BatchMatMulV2_001)
+#addeval(BatchToSpaceND_000)
+addeval(Cast_000)
+addeval(Cast_001)
+#addeval(Ceil_000)
+addeval(Concatenation_000)
+addeval(Concatenation_U8_000)
+addeval(Conv2D_000)
+addeval(Conv2D_001)
+addeval(Conv2D_002)
+addeval(Conv2D_003)
+addeval(Conv2D_U8_000)
+addeval(Conv2D_U8_001)
+#addeval(Cos_000)
+addeval(DepthToSpace_000)
+addeval(DepthwiseConv2D_000)
+addeval(DepthwiseConv2D_U8_000)
+#addeval(DepthwiseConv2D_U8_001)
+addeval(DepthwiseConv2D_001)
+addeval(Div_000)
+addeval(ELU_000)
+addeval(Equal_000)
+addeval(Exp_000)
+#addeval(ExpandDims_000)
+#addeval(ExpandDims_001)
+#addeval(ExpandDims_002)
+#addeval(ExpandDims_003)
+#addeval(Fill_000)
+#addeval(Fill_001)
+addeval(Floor_000)
+#addeval(FloorDiv_000)
+#addeval(FloorDiv_001)
+#addeval(FloorMod_000)
+#addeval(FloorMod_001)
+addeval(FullyConnected_000)
+addeval(FullyConnected_001)
+addeval(FullyConnected_002)
+#addeval(FullyConnected_U8_000)
+addeval(Gather_000)
+#addeval(GatherNd_000)
+#addeval(Greater_000)
+#addeval(GreaterEqual_000)
+addeval(If_000)
+addeval(If_001)
+addeval(L2Normalize_000)
+addeval(L2Pool2D_000)
+#addeval(L2Pool2D_U8_000)
+addeval(LeakyRelu_000)
+addeval(Less_000)
+addeval(LessEqual_000)
+addeval(LocalResponseNormalization_000)
+#addeval(Log_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
+addeval(Logistic_000)
+addeval(LogSoftmax_000)
+#addeval(MatMul_000)
+#addeval(MatrixDiag_000)
+#addeval(MatrixSetDiag_000)
+addeval(Maximum_000)
+addeval(MaxPool2D_000)
+addeval(MaxPool2D_U8_000)
+addeval(Mean_000)
+addeval(Mean_001)
+#addeval(Mean_U8_000)
+#addeval(Minimum_000)
+#addeval(MirrorPad_000)
+addeval(Mul_000)
+#addeval(Mul_U8_000)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
+#addeval(OneHot_003)
+addeval(Pack_000)
+addeval(Pack_U8_000)
+addeval(Pad_000)
+addeval(Pad_U8_000)
+addeval(Pow_000)
+addeval(PRelu_000)
+#addeval(Range_000)
+#addeval(Rank_000)
+#addeval(ReduceAny_000)
+#addeval(ReduceAny_001)
+#addeval(ReduceAny_002)
+#addeval(ReduceAny_003)
+#addeval(ReduceMax_000)
+#addeval(ReduceMin_000)
+#addeval(ReduceProd_000)
+#addeval(ReduceProd_001)
+#addeval(ReduceProd_002)
+#addeval(ReduceProd_003)
+addeval(ReLU_000)
+addeval(ReLU6_000)
+#addeval(ReLUN1To1_000)
+addeval(Reshape_000)
+addeval(Reshape_001)
+addeval(Reshape_002)
+#addeval(Reshape_003)
+addeval(Reshape_U8_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
+#addeval(ReverseSequence_000)
+#addeval(ReverseV2_000)
+#addeval(Round_000)
+addeval(Rsqrt_000)
+#addeval(ScatterNd_000)
+#addeval(SegmentSum_000)
+#addeval(Select_000)
+#addeval(Select_001)
+#addeval(Select_002)
+#addeval(SelectV2_000)
+#addeval(SelectV2_001)
+#addeval(SelectV2_002)
+#addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
+#addeval(Sin_000)
+addeval(Slice_000)
+addeval(Softmax_000)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
+addeval(SpaceToDepth_000)
+#addeval(SparseToDense_000)
+addeval(Split_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
+addeval(Squeeze_000)
+addeval(Squeeze_001)
+addeval(StridedSlice_000)
+addeval(StridedSlice_001)
+addeval(StridedSlice_002)
+addeval(Sub_000)
+addeval(Sub_U8_000)
+#addeval(Sum_000)
+#addeval(Sum_001)
+addeval(SVDF_000)
+addeval(SVDF_001)
+addeval(Tanh_000)
+#addeval(Tile_000)
+#addeval(Tile_U8_000)
+#addeval(TopKV2_000)
+#addeval(TopKV2_001)
+addeval(Transpose_000)
+addeval(TransposeConv_000)
+addeval(Unpack_000)
+addeval(Unpack_001)
+addeval(Unpack_002)
+addeval(Unpack_003)
+#addeval(Where_000)
+#addeval(Where_001)
+#addeval(While_000)
+#addeval(While_001)
+#addeval(While_002)
+#addeval(While_003)
+addeval(YUV_TO_RGB_U8_000)
+#addeval(ZerosLike_000)
+
+# Simple Network test
+addeval(Part_While_000)
+addeval(Part_While_001)
diff --git a/compiler/enco/CMakeLists.txt b/compiler/enco/CMakeLists.txt

index 17300e25e3367a2e40f1f812c46c475599feb7f3..3702f9501bd642fe2f22afb38975855d5d697048 100644 (file)
--- a/compiler/enco/CMakeLists.txt
+++ b/compiler/enco/CMakeLists.txt
@@ -1,4 +1,9 @@
  add_subdirectory(core)
  add_subdirectory(frontend)
  add_subdirectory(cli)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  add_subdirectory(test)
diff --git a/compiler/enco/core/CMakeLists.txt b/compiler/enco/core/CMakeLists.txt

index 25dad2bc638b72e12e9dbfe4e57c649096b57639..19a64231a706c6e64b12e0495b8fcd4521250022 100644 (file)
--- a/compiler/enco/core/CMakeLists.txt
+++ b/compiler/enco/core/CMakeLists.txt
@@ -20,11 +20,11 @@ target_link_libraries(enco_core PRIVATE morph)
  # Let's use nncc project-wide build options
  target_link_libraries(enco_core PRIVATE nncc_common)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  add_executable(enco_core_test ${TESTS})
  target_include_directories(enco_core_test PRIVATE src)
diff --git a/compiler/enco/frontend/caffe/CMakeLists.txt b/compiler/enco/frontend/caffe/CMakeLists.txt

index 9722392a1eb18d488acb016d0e6ead7c1de9b073..baf7f7bd6f1f3c4d2fec8ca9d867fed2364e0ab1 100644 (file)
--- a/compiler/enco/frontend/caffe/CMakeLists.txt
+++ b/compiler/enco/frontend/caffe/CMakeLists.txt
@@ -17,11 +17,11 @@ target_link_libraries(enco_caffe_frontend enco_intf_cmdline)
  target_link_libraries(enco_caffe_frontend morph)
  target_link_libraries(enco_caffe_frontend caffeproto)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  nnas_find_package(Caffe QUIET)
  
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt

index b2de2b34b54d3c465baf039c194b4922938cf2b7..995e66f81fccea014c765674c12172706534dbc6 100644 (file)
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    return()
@@ -17,16 +17,15 @@ add_library(enco_tflite_frontend SHARED ${SOURCES})
  target_include_directories(enco_tflite_frontend PRIVATE src)
  target_link_libraries(enco_tflite_frontend enco_intf_frontend)
  target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
-target_link_libraries(enco_tflite_frontend flatbuffers-1.10)
  target_link_libraries(enco_tflite_frontend enco_tflite_schema)
  target_link_libraries(enco_tflite_frontend morph)
  target_link_libraries(enco_tflite_frontend cwrap)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  add_executable(enco_tflite_frontend_test ${TESTS})
  target_include_directories(enco_tflite_frontend_test PRIVATE src)
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt

index 9d02f7cba529acf31fe404cb7244e117d62fe009..645db714c1df4c038efe0f28b50de113d68863f1 100644 (file)
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build exo: FALSE (missing FlatBuffers)")
@@ -15,7 +15,7 @@ endif(NOT TensorFlowSource_FOUND)
  message(STATUS "Build exo: TRUE")
  
  set(TFLITE_SCHEMA_DIR "${TensorFlowSource_DIR}/tensorflow/lite/schema")
-set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema")
+set(CIRCLE_SCHEMA_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3")
  
  FlatBuffers_Target(exo_tflite_fbs
    OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
diff --git a/compiler/hermes-std/CMakeLists.txt b/compiler/hermes-std/CMakeLists.txt

index 8fce319538158ed222516500d96d47b6646db738..673d7056c43f9ad35ed807dddc9a941bde8ebc5d 100644 (file)
--- a/compiler/hermes-std/CMakeLists.txt
+++ b/compiler/hermes-std/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(hermes_std STATIC ${SOURCES})
-set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(hermes_std PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(hermes_std PUBLIC include)
  target_link_libraries(hermes_std PUBLIC hermes)
  target_link_libraries(hermes_std PRIVATE pepper_strcast)
diff --git a/compiler/hermes-std/include/hermes/ConsoleReporter.h b/compiler/hermes-std/include/hermes/ConsoleReporter.h

index e09dd5785f422fa27c43abcdbd67cfd9e1e46496..c55e46a17d01a40763e49817ab609ab77f64a093 100644 (file)
--- a/compiler/hermes-std/include/hermes/ConsoleReporter.h
+++ b/compiler/hermes-std/include/hermes/ConsoleReporter.h
@@ -28,6 +28,10 @@ namespace hermes
  struct ConsoleReporter final : public hermes::Sink
  {
    void notify(const Message *m) final;
+  void set_colored_mode(bool is_colored) { _is_colored = is_colored; }
+
+private:
+  bool _is_colored = false;
  };
  
  } // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.cpp b/compiler/hermes-std/src/ConsoleReporter.cpp

index 3cc9f09ed76cda229d4dc561fc675e6dda3004fd..524ed59d843844fa006e1d2b0330659fc16922c8 100644 (file)
--- a/compiler/hermes-std/src/ConsoleReporter.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.cpp
@@ -17,16 +17,68 @@
  #include "hermes/ConsoleReporter.h"
  
  #include <iostream>
+#include <cstdlib>
+#include <string>
  
  namespace hermes
  {
  
+static constexpr const char *kTermColorRedTextCode = "\033[0;31m";
+static constexpr const char *kTermColorGreenTextCode = "\033[0;32m";
+static constexpr const char *kTermColorOrangeTextCode = "\033[0;33m";
+static constexpr const char *kTermColorBlueTextCode = "\033[0;34m";
+static constexpr const char *kTermColorMagentaTextCode = "\033[0;35m";
+static constexpr const char *kTermColorCyanTextCode = "\033[0;36m";
+static constexpr const char *kTermColorWhiteTextCode = "\033[0;37m";
+
+static constexpr const char *kTermBoldTextCode = "\033[1m";
+static constexpr const char *kTermUnderlineTextCode = "\033[4m";
+static constexpr const char *kTermInverseTextCode = "\033[7m";
+static constexpr const char *kTermBoldOffTextCode = "\033[21m";
+static constexpr const char *kTermUnderlineOffTextCode = "\033[24m";
+static constexpr const char *kTermInverseOffTextCode = "\033[27m";
+
+static constexpr const char *kTermColorResetAllCode = "\033[0m";
+
  void ConsoleReporter::notify(const hermes::Message *m)
  {
+  const char *env_color_p = std::getenv("ONE_HERMES_COLOR");
+  if (env_color_p)
+  {
+    auto env_color_str = std::string(env_color_p);
+    if ((env_color_str == "1") or (env_color_str == "ON"))
+      _is_colored = true;
+  }
+
+  if (_is_colored)
+  {
+    switch (m->get_severity())
+    {
+      case FATAL:
+        std::cout << kTermColorRedTextCode << kTermBoldTextCode << kTermUnderlineTextCode;
+        break;
+      case ERROR:
+        std::cout << kTermColorRedTextCode;
+        break;
+      case WARN:
+        std::cout << kTermColorOrangeTextCode;
+        break;
+      case INFO:
+        std::cout << kTermColorGreenTextCode;
+        break;
+      case VERBOSE:
+        std::cout << kTermColorResetAllCode;
+        break;
+    };
+  }
    for (uint32_t n = 0; n < m->text()->lines(); ++n)
    {
      std::cout << m->text()->line(n) << std::endl;
    }
+  if (_is_colored)
+  {
+    std::cout << kTermColorResetAllCode;
+  }
  }
  
  } // namespace hermes
diff --git a/compiler/hermes-std/src/ConsoleReporter.test.cpp b/compiler/hermes-std/src/ConsoleReporter.test.cpp

index a65585a6a7913177f029daa28b68a621d58c35fa..d959ff3d91c210600a91dcdfd3dfdb1e301a633d 100644 (file)
--- a/compiler/hermes-std/src/ConsoleReporter.test.cpp
+++ b/compiler/hermes-std/src/ConsoleReporter.test.cpp
@@ -43,3 +43,168 @@ TEST(ConsoleReporterTest, notify)
  
    ASSERT_NO_THROW(r.notify(&m));
  }
+
+TEST(ConsoleReporterTest, notify_fatal)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as FATAL" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as ERROR" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as WARN" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as INFO" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is colored as VERBOSE" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
+  }
+
+  hermes::ConsoleReporter r;
+
+  r.set_colored_mode(true);
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_fatal_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as FATAL" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::FATAL);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_error_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as ERROR" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::ERROR);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_warn_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as WARN" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::WARN);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_info_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as INFO" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::INFO);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
+
+TEST(ConsoleReporterTest, notify_verbose_NEG)
+{
+  hermes::Message m;
+  {
+    std::stringstream ss;
+
+    ss << "This message is not colored as VERBOSE" << std::endl;
+
+    m.text(std::make_unique<hermes::MessageText>(ss), hermes::VERBOSE);
+  }
+
+  hermes::ConsoleReporter r;
+
+  ASSERT_NO_THROW(r.notify(&m));
+}
diff --git a/compiler/hermes/CMakeLists.txt b/compiler/hermes/CMakeLists.txt

index e1a71c2b47bcd67f1ea6adf60d87314053bdd4e8..d33e2d735e3e7c4b05824a58b1a3feca5abead73 100644 (file)
--- a/compiler/hermes/CMakeLists.txt
+++ b/compiler/hermes/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(hermes STATIC ${SOURCES})
-set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(hermes PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(hermes PUBLIC include)
  # Let's apply nncc common compile options
  #
diff --git a/compiler/hermes/include/hermes/core/Message.h b/compiler/hermes/include/hermes/core/Message.h

index 460163f649f91fd879f1e4b088e471c0e2477c35..d76f0eb6f333aea542eaca004ebaeb2196830257 100644 (file)
--- a/compiler/hermes/include/hermes/core/Message.h
+++ b/compiler/hermes/include/hermes/core/Message.h
@@ -17,6 +17,8 @@
  #ifndef __HERMES_MESSAGE_H__
  #define __HERMES_MESSAGE_H__
  
+#include "Severity.h"
+
  #include <memory>
  #include <sstream>
  #include <string>
@@ -48,7 +50,6 @@ private:
   * @brief Message with metadata
   *
   * TODO Add "Timestamp" field
- * TODO Add "Severity" field
   * TODO Support extensible "attribute" annotation
   */
  class Message final
@@ -58,10 +59,17 @@ public:
  
  public:
    void text(std::unique_ptr<MessageText> &&text) { _text = std::move(text); }
+  void text(std::unique_ptr<MessageText> &&text, SeverityCategory severity)
+  {
+    _text = std::move(text);
+    _severity = severity;
+  }
    const MessageText *text(void) const { return _text.get(); }
+  SeverityCategory get_severity(void) const { return _severity; }
  
  private:
    std::unique_ptr<MessageText> _text;
+  SeverityCategory _severity = SeverityCategory::INFO;
  };
  
  } // namespace hermes
diff --git a/compiler/hermes/include/hermes/core/MessageBuffer.h b/compiler/hermes/include/hermes/core/MessageBuffer.h

index a2f1de74d0c0f83b79c88d9965276b96a39aaedc..1e2e9b9dc5065ca7004cd4dcd9c306f1b728b2e4 100644 (file)
--- a/compiler/hermes/include/hermes/core/MessageBuffer.h
+++ b/compiler/hermes/include/hermes/core/MessageBuffer.h
@@ -18,6 +18,7 @@
  #define __HERMES_MESSAGE_BUFFER_H__
  
  #include "hermes/core/MessageBus.h"
+#include "hermes/core/Severity.h"
  
  #include <ostream>
  #include <sstream>
@@ -34,6 +35,7 @@ class MessageBuffer final
  {
  public:
    MessageBuffer(MessageBus *);
+  MessageBuffer(MessageBus *bus, SeverityCategory severity);
    ~MessageBuffer();
  
  public:
@@ -41,6 +43,7 @@ public:
  
  private:
    MessageBus *_bus;
+  SeverityCategory _severity = SeverityCategory::INFO;
  
    /// @brief Content buffer
    std::stringstream _ss;
diff --git a/compiler/hermes/src/core/MessageBuffer.cpp b/compiler/hermes/src/core/MessageBuffer.cpp

index a4ff4eeffe76b7c0d2c0d522167e1120a987dade..ce1f176d9ef43e787121456240d83d5d0a437b95 100644 (file)
--- a/compiler/hermes/src/core/MessageBuffer.cpp
+++ b/compiler/hermes/src/core/MessageBuffer.cpp
@@ -26,13 +26,19 @@ MessageBuffer::MessageBuffer(MessageBus *bus) : _bus{bus}
    // DO NOTHING
  }
  
+MessageBuffer::MessageBuffer(MessageBus *bus, SeverityCategory severity)
+  : _bus{bus}, _severity{severity}
+{
+  // DO NOTHING
+}
+
  MessageBuffer::~MessageBuffer()
  {
    // NOTE The current implementation is unsafe as it may throw an excpetion.
    // TODO Find a better safe implementation.
    auto msg = std::make_unique<Message>();
  
-  msg->text(std::make_unique<MessageText>(_ss));
+  msg->text(std::make_unique<MessageText>(_ss), _severity);
  
    _bus->post(std::move(msg));
  }
diff --git a/compiler/hermes/src/core/Source.cpp b/compiler/hermes/src/core/Source.cpp

index d124f44304059e2a617f7024b9bd2190571174a3..cb60d9a312463eac5a13dc7a672e3991cc8cd90f 100644 (file)
--- a/compiler/hermes/src/core/Source.cpp
+++ b/compiler/hermes/src/core/Source.cpp
@@ -60,10 +60,9 @@ void Source::deactivate(void)
  
  void Source::reload(const Config *c) { c->configure(this, _setting); }
  
-std::unique_ptr<MessageBuffer> Source::buffer(const Severity &) const
+std::unique_ptr<MessageBuffer> Source::buffer(const Severity &severity) const
  {
-  // TODO Pass Severity
-  return std::make_unique<MessageBuffer>(_bus);
+  return std::make_unique<MessageBuffer>(_bus, severity.category());
  }
  
  } // namespace hermes
diff --git a/compiler/locomotiv/CMakeLists.txt b/compiler/locomotiv/CMakeLists.txt

index 308f486199ecefdc62221b9c2e339b7a5184bce7..34835e48351ebc6a571e0ede0e9409768ae523fe 100644 (file)
--- a/compiler/locomotiv/CMakeLists.txt
+++ b/compiler/locomotiv/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(locomotiv STATIC ${SOURCES})
-set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(locomotiv PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif (NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(locomotiv PUBLIC include)
  target_include_directories(locomotiv PRIVATE src)
  target_link_libraries(locomotiv PUBLIC loco)
diff --git a/compiler/locop/CMakeLists.txt b/compiler/locop/CMakeLists.txt

index f02fb1a72ab8f0a6e15dca519abe0fe3dc241ab8..43ec41af47699876bf72a79b8dccc95b2980ea8b 100644 (file)
--- a/compiler/locop/CMakeLists.txt
+++ b/compiler/locop/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(locop STATIC ${SOURCES})
-set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(locop PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(locop PUBLIC include)
  target_link_libraries(locop PUBLIC loco)
  # Let's apply nncc common compile options
diff --git a/compiler/logo-core/CMakeLists.txt b/compiler/logo-core/CMakeLists.txt

index 3bc71dbd00f678cf60fb7c60d100f3cb8d387d37..374794f90690916a70e7d3c77626c3924fb91034 100644 (file)
--- a/compiler/logo-core/CMakeLists.txt
+++ b/compiler/logo-core/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(logo_core STATIC ${SOURCES})
-set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(logo_core PRIVATE src)
  target_include_directories(logo_core PUBLIC include)
  target_link_libraries(logo_core PUBLIC loco)
diff --git a/compiler/logo-ex/CMakeLists.txt b/compiler/logo-ex/CMakeLists.txt

new file mode 100644 (file)

index 0000000..31d7602
--- /dev/null
+++ b/compiler/logo-ex/CMakeLists.txt
@@ -0,0 +1,23 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(logo_ex STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo_ex PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(logo_ex PRIVATE src)
+target_include_directories(logo_ex PUBLIC include)
+target_link_libraries(logo_ex PUBLIC loco)
+target_link_libraries(logo_ex PUBLIC logo_core)
+target_link_libraries(logo_ex PRIVATE locomotiv)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(logo_ex_test ${TESTS})
+target_include_directories(logo_ex_test PRIVATE src)
+target_link_libraries(logo_ex_test logo_ex)
diff --git a/compiler/logo-ex/README.md b/compiler/logo-ex/README.md

new file mode 100644 (file)

index 0000000..8ea55a2
--- /dev/null
+++ b/compiler/logo-ex/README.md
@@ -0,0 +1,6 @@
+# logo-ex
+
+_logo-ex_ provides _loco_ Extended Graph Passes for Transformation and Optimization
+that gets help from _locomotiv_
+
+NOTE: f2e7c38dcc601cb290c380d8314a3ae627923f58 is where this came from
diff --git a/compiler/logo-ex/include/logo/ConstantFoldingPass.h b/compiler/logo-ex/include/logo/ConstantFoldingPass.h

new file mode 100644 (file)

index 0000000..9143ae4
--- /dev/null
+++ b/compiler/logo-ex/include/logo/ConstantFoldingPass.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_EX_CONSTANT_FOLDING_PASS_H__
+#define __LOGO_EX_CONSTANT_FOLDING_PASS_H__
+
+#include <logo/Pass.h>
+
+#include <loco.h>
+
+namespace logo
+{
+
+/**
+ * @brief  Performs constant folding optimization
+ */
+class ConstantFoldingPass : public Pass
+{
+public:
+  const char *name(void) const final { return "ConstantFoldingPass"; }
+
+public:
+  bool run(loco::Graph *graph) override;
+};
+
+} // namespace logo
+
+#endif // __LOGO_EX_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo-ex/include/logo/PassesEx.h b/compiler/logo-ex/include/logo/PassesEx.h

new file mode 100644 (file)

index 0000000..8bdf93b
--- /dev/null
+++ b/compiler/logo-ex/include/logo/PassesEx.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LOGO_PASSES_EX_H__
+#define __LOGO_PASSES_EX_H__
+
+// Please keep this in alphabetical order
+
+#include <logo/ConstantFoldingPass.h>
+
+#endif // __LOGO_PASSES_EX_H__
diff --git a/compiler/logo-ex/requires.cmake b/compiler/logo-ex/requires.cmake

new file mode 100644 (file)

index 0000000..c761833
--- /dev/null
+++ b/compiler/logo-ex/requires.cmake
@@ -0,0 +1,3 @@
+require("loco")
+require("logo-core")
+require("locomotiv")
diff --git a/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp

new file mode 100644 (file)

index 0000000..97d7545
--- /dev/null
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include <loco.h>
+#include <loco/IR/CanonicalDialect.h>
+
+#include <locomotiv/Session.h>
+
+#include <cassert>
+#include <stdexcept>
+
+namespace
+{
+
+uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape)
+{
+  if (shape.rank() == 0)
+  {
+    return 0;
+  }
+
+  uint64_t res = 1;
+
+  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
+  {
+    assert(shape.dim(axis).known());
+    res *= shape.dim(axis).value();
+  }
+
+  return res;
+}
+
+/// @brief For some op, constant folding should not be performed. This returns true if node is such
+/// op.
+bool skip(const loco::Node *node)
+{
+  static std::set<uint32_t> skip_op = {
+    // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
+    //      `Feature`, `Filter`, `Bias`, etc.
+    static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
+    static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
+
+    // We don't perform constant folding for Push
+    static_cast<uint32_t>(loco::CanonicalOpcode::Push),
+
+    // TensorBroadcast is a good hint for optimization
+    // TODO Let this option be controlled by driver using logo
+    static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
+  };
+
+  if (node->dialect() == loco::CanonicalDialect::get())
+  {
+    if (skip_op.find(node->opnum()) != skip_op.end())
+      return true;
+  }
+
+  return false;
+}
+
+/// @brief Checks if a node is a target of constant folding transform
+bool foldable(const loco::Node *node)
+{
+  if (node->dialect() == loco::CanonicalDialect::get())
+  {
+    if (skip(node))
+      return false;
+
+    if (node->arity() == 0) // e.g., when a node is e.g, ConstGen or Pull
+      return false;
+
+    // When all args are ConstGen, let's do Constant Folding Transforms
+    for (int i = 0; i < node->arity(); i++)
+    {
+      if (node->arg(i)->opnum() != static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen))
+        return false;
+    }
+
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+void fold(loco::Graph *graph, loco::Node *node)
+{
+  assert(foldable(node)); // sanity check to find a mistake when this function is reused later
+
+  // calcluate foldable node
+  locomotiv::Session sess(graph, std::vector<loco::Node *>{node});
+  sess.infer();
+  auto data = sess.get_output(0);
+
+  assert(data != nullptr);
+
+  auto shape = data->shape();
+  auto dtype = data->dtype();
+
+  // build ConstGen
+  auto new_const = graph->nodes()->create<loco::ConstGen>();
+  {
+    new_const->dtype(dtype);
+
+    new_const->rank(shape->rank());
+    for (int d = 0; d < shape->rank(); d++)
+      new_const->dim(d) = shape->dim(d);
+
+    auto count = num_elements(*new_const);
+
+    if (dtype == loco::DataType::FLOAT32)
+    {
+      new_const->size<loco::DataType::FLOAT32>(count);
+
+      auto const_buf = data->as_f32_bufptr()->base();
+      for (int x = 0; x < count; x++)
+        new_const->at<loco::DataType::FLOAT32>(x) = const_buf[x];
+    }
+    else if (dtype == loco::DataType::S32)
+    {
+      new_const->size<loco::DataType::S32>(count);
+
+      auto const_buf = data->as_s32_bufptr()->base();
+      for (int x = 0; x < count; x++)
+        new_const->at<loco::DataType::S32>(x) = const_buf[x];
+    }
+  }
+
+  // replace node with new_const
+  loco::replace(node).with(new_const);
+}
+
+} // namespace
+
+namespace logo
+{
+
+bool ConstantFoldingPass::run(loco::Graph *graph)
+{
+  auto outputs = loco::output_nodes(graph);
+
+  bool changed = false;
+  for (auto node : loco::postorder_traversal(outputs))
+  {
+    if (foldable(node))
+    {
+      fold(graph, node);
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace logo
diff --git a/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp

new file mode 100644 (file)

index 0000000..ba571a7
--- /dev/null
+++ b/compiler/logo-ex/src/Passes/ConstantFoldingPass.test.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/ConstantFoldingPass.h>
+
+#include "TestHelper.h"
+
+#include <loco.h>
+
+#include <gtest/gtest.h>
+
+using namespace logo::test;
+
+TEST(ConstantFoldingTest, name)
+{
+  logo::ConstantFoldingPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(ConstantFoldingTest, run_NEG)
+{
+  loco::Graph g;
+  logo::ConstantFoldingPass pass;
+
+  ASSERT_FALSE(pass.run(&g));
+}
+
+namespace
+{
+
+/*
+  test case:
+      ConstGen ---- Relu ---- Push
+   (-3.14, 3.14)      (0, 3.14)
+
+  after constant folding:
+                 ConstGen ------Push
+                      (0, 3.14)
+*/
+void create_net_const_relu(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_node->dtype(loco::DataType::FLOAT32);
+    const_node->rank(1);
+    const_node->dim(0) = 2;
+    const_node->size<loco::DataType::FLOAT32>(2);
+    const_node->at<loco::DataType::FLOAT32>(0) = -3.14f;
+    const_node->at<loco::DataType::FLOAT32>(1) = 3.14f;
+  }
+
+  auto relu_node = graph->nodes()->create<loco::ReLU>();
+  {
+    relu_node->input(const_node);
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(relu_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_const)
+{
+  auto graph = loco::make_graph();
+  create_net_const_relu(graph.get());
+
+  logo::ConstantFoldingPass pass;
+  while (pass.run(graph.get()) == true)
+  {
+    ;
+  }
+
+  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
+  ASSERT_NE(const_gen, nullptr);
+
+  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 2);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0); // result of relu(-3.14)
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 3.14f);
+}
+
+namespace
+{
+
+/*
+  test case:
+        ConstGen ---- Relu ---+
+        (-1, 1)        (0, 1) |
+                  ConstGen ---+-- ConcatV2 ----- Push
+                  (2, 3)      |       (0, 1, 2, 3)
+                   axis(0) ---+
+
+  after constant folding:
+                                  ConstGen ----- Push
+                                  (0, 1, 2, 3)
+*/
+void create_net_const_relu_concat(loco::Graph *graph)
+{
+  assert(graph);
+
+  auto const_1_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_1_node->dtype(loco::DataType::FLOAT32);
+    const_1_node->rank(1);
+    const_1_node->dim(0) = 2;
+    const_1_node->size<loco::DataType::FLOAT32>(2);
+    const_1_node->at<loco::DataType::FLOAT32>(0) = -1.0f;
+    const_1_node->at<loco::DataType::FLOAT32>(1) = 1.0f;
+  }
+
+  auto relu_node = graph->nodes()->create<loco::ReLU>();
+  {
+    relu_node->input(const_1_node);
+  }
+
+  auto const_2_node = graph->nodes()->create<loco::ConstGen>();
+  {
+    const_2_node->dtype(loco::DataType::FLOAT32);
+    const_2_node->rank(1);
+    const_2_node->dim(0) = 2;
+    const_2_node->size<loco::DataType::FLOAT32>(2);
+    const_2_node->at<loco::DataType::FLOAT32>(0) = 2.0f;
+    const_2_node->at<loco::DataType::FLOAT32>(1) = 3.0f;
+  }
+
+  auto concat_node = graph->nodes()->create<loco::TensorConcat>();
+  {
+    concat_node->lhs(relu_node);
+    concat_node->rhs(const_2_node);
+    concat_node->axis(0);
+  }
+
+  auto push_node = graph->nodes()->create<loco::Push>();
+  {
+    push_node->from(concat_node);
+  }
+
+  auto graph_output = graph->outputs()->create();
+  {
+    graph_output->name("output");
+    graph_output->dtype(loco::DataType::FLOAT32);
+    loco::link(graph_output, push_node);
+  }
+}
+
+} // namespace
+
+TEST(ConstantFolding, const_relu_to_concat)
+{
+  auto graph = loco::make_graph();
+  create_net_const_relu_concat(graph.get());
+
+  logo::ConstantFoldingPass pass;
+  while (pass.run(graph.get()) == true)
+  {
+    ;
+  }
+
+  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
+  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
+  ASSERT_NE(const_gen, nullptr);
+
+  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 4);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 1);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(2), 2);
+  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(3), 3);
+}
diff --git a/compiler/logo-ex/src/TestHelper.h b/compiler/logo-ex/src/TestHelper.h

new file mode 100644 (file)

index 0000000..07e3b20
--- /dev/null
+++ b/compiler/logo-ex/src/TestHelper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TEST_HELPER_H__
+#define __TEST_HELPER_H__
+
+#include <loco.h>
+
+namespace logo
+{
+namespace test
+{
+
+template <typename T> T *find_first_node_by_type(loco::Graph *g)
+{
+  T *first_node = nullptr;
+
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  {
+    first_node = dynamic_cast<T *>(node);
+    if (first_node != nullptr)
+      break;
+  }
+
+  return first_node;
+}
+
+} // namespace test
+} // namespace logo
+
+#endif // __TEST_HELPER_H__
diff --git a/compiler/logo/CMakeLists.txt b/compiler/logo/CMakeLists.txt

index a8efd9b0342755733bf47f25951c93c921bd78de..e6a6f907f18938033736fc700dc736ea9330f4ec 100644 (file)
--- a/compiler/logo/CMakeLists.txt
+++ b/compiler/logo/CMakeLists.txt
@@ -3,12 +3,13 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(logo STATIC ${SOURCES})
-set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(logo PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(logo PRIVATE src)
  target_include_directories(logo PUBLIC include)
  target_link_libraries(logo PUBLIC loco)
  target_link_libraries(logo PUBLIC logo_core)
-target_link_libraries(logo PRIVATE locomotiv)
  
  if(NOT ENABLE_TEST)
    return()
diff --git a/compiler/logo/include/logo/ConstantFoldingPass.h b/compiler/logo/include/logo/ConstantFoldingPass.h

deleted file mode 100644 (file)

index 99ccdc3..0000000
--- a/compiler/logo/include/logo/ConstantFoldingPass.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LOGO_CONSTANT_FOLDING_PASS_H__
-#define __LOGO_CONSTANT_FOLDING_PASS_H__
-
-#include <logo/Pass.h>
-
-#include <loco.h>
-
-namespace logo
-{
-
-/**
- * @brief  Performs constant folding optimization
- */
-class ConstantFoldingPass : public Pass
-{
-public:
-  const char *name(void) const final { return "ConstantFoldingPass"; }
-
-public:
-  bool run(loco::Graph *graph) override;
-};
-
-} // namespace logo
-
-#endif // __LOGO_CONSTANT_FOLDING_PASS_H__
diff --git a/compiler/logo/include/logo/Passes.h b/compiler/logo/include/logo/Passes.h

index 636251e45b223a49d29ca21603a25c1006b7a443..06fd3212b13851673f2aa0c0f82a71b69d31fccd 100644 (file)
--- a/compiler/logo/include/logo/Passes.h
+++ b/compiler/logo/include/logo/Passes.h
@@ -19,7 +19,6 @@
  
  // Please keep this in alphabetical order
  
-#include <logo/ConstantFoldingPass.h>
  #include <logo/RemoveDeadNodePass.h>
  #include <logo/RemoveForwardNodePass.h>
  #include <logo/ReorderDecodePass.h>
diff --git a/compiler/logo/requires.cmake b/compiler/logo/requires.cmake

index c761833535f4865670e125de279d0b5f18adbd49..3e4d227cda781df332e8c384a8c3b98a94c37145 100644 (file)
--- a/compiler/logo/requires.cmake
+++ b/compiler/logo/requires.cmake
@@ -1,3 +1,2 @@
  require("loco")
  require("logo-core")
-require("locomotiv")
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.cpp

deleted file mode 100644 (file)

index 2bd4759..0000000
--- a/compiler/logo/src/Passes/ConstantFoldingPass.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <logo/ConstantFoldingPass.h>
-
-#include <loco.h>
-#include <loco/IR/CanonicalDialect.h>
-
-#include <locomotiv/Session.h>
-
-#include <cassert>
-#include <stdexcept>
-
-namespace
-{
-
-uint64_t num_elements(const loco::NodeMixin<loco::NodeTrait::TensorShape> &shape)
-{
-  if (shape.rank() == 0)
-  {
-    return 0;
-  }
-
-  uint64_t res = 1;
-
-  for (uint32_t axis = 0; axis < shape.rank(); ++axis)
-  {
-    assert(shape.dim(axis).known());
-    res *= shape.dim(axis).value();
-  }
-
-  return res;
-}
-
-/// @brief For some op, constant folding should not be performed. This returns true if node is such
-/// op.
-bool skip(const loco::Node *node)
-{
-  static std::set<uint32_t> skip_op = {
-    // TODO Current implementation works for 'Tensor' domain only. Support other domains such as
-    //      `Feature`, `Filter`, `Bias`, etc.
-    static_cast<uint32_t>(loco::CanonicalOpcode::FilterEncode),
-    static_cast<uint32_t>(loco::CanonicalOpcode::FeatureEncode),
-    static_cast<uint32_t>(loco::CanonicalOpcode::BiasEncode),
-    static_cast<uint32_t>(loco::CanonicalOpcode::DepthwiseFilterEncode),
-
-    // We don't perform constant folding for Push
-    static_cast<uint32_t>(loco::CanonicalOpcode::Push),
-
-    // TensorBroadcast is a good hint for optimization
-    // TODO Let this option be controlled by driver using logo
-    static_cast<uint32_t>(loco::CanonicalOpcode::TensorBroadcast),
-  };
-
-  if (node->dialect() == loco::CanonicalDialect::get())
-  {
-    if (skip_op.find(node->opnum()) != skip_op.end())
-      return true;
-  }
-
-  return false;
-}
-
-/// @brief Checks if a node is a target of constant folding transform
-bool foldable(const loco::Node *node)
-{
-  if (node->dialect() == loco::CanonicalDialect::get())
-  {
-    if (skip(node))
-      return false;
-
-    if (node->arity() == 0) // e.g., when a node is e.g, ConstGen or Pull
-      return false;
-
-    // When all args are ConstGen, let's do Constant Folding Transforms
-    for (int i = 0; i < node->arity(); i++)
-    {
-      if (node->arg(i)->opnum() != static_cast<uint32_t>(loco::CanonicalOpcode::ConstGen))
-        return false;
-    }
-
-    return true;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-void fold(loco::Graph *graph, loco::Node *node)
-{
-  assert(foldable(node)); // sanity check to find a mistake when this function is reused later
-
-  // calcluate foldable node
-  locomotiv::Session sess(graph, std::vector<loco::Node *>{node});
-  sess.infer();
-  auto data = sess.get_output(0);
-
-  assert(data != nullptr);
-
-  auto shape = data->shape();
-  auto dtype = data->dtype();
-
-  // build ConstGen
-  auto new_const = graph->nodes()->create<loco::ConstGen>();
-  {
-    new_const->dtype(dtype);
-
-    new_const->rank(shape->rank());
-    for (int d = 0; d < shape->rank(); d++)
-      new_const->dim(d) = shape->dim(d);
-
-    auto count = num_elements(*new_const);
-
-    if (dtype == loco::DataType::FLOAT32)
-    {
-      new_const->size<loco::DataType::FLOAT32>(count);
-
-      auto const_buf = data->as_f32_bufptr()->base();
-      for (int x = 0; x < count; x++)
-        new_const->at<loco::DataType::FLOAT32>(x) = const_buf[x];
-    }
-    else if (dtype == loco::DataType::S32)
-    {
-      new_const->size<loco::DataType::S32>(count);
-
-      auto const_buf = data->as_s32_bufptr()->base();
-      for (int x = 0; x < count; x++)
-        new_const->at<loco::DataType::S32>(x) = const_buf[x];
-    }
-  }
-
-  // replace node with new_const
-  loco::replace(node).with(new_const);
-}
-
-} // namespace
-
-namespace logo
-{
-
-bool ConstantFoldingPass::run(loco::Graph *graph)
-{
-  auto outputs = loco::output_nodes(graph);
-
-  bool changed = false;
-  for (auto node : loco::postorder_traversal(outputs))
-  {
-    if (foldable(node))
-    {
-      fold(graph, node);
-      changed = true;
-    }
-  }
-
-  return changed;
-}
-
-} // namespace logo
diff --git a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp b/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp

deleted file mode 100644 (file)

index 5d222eb..0000000
--- a/compiler/logo/src/Passes/ConstantFoldingPass.test.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <logo/ConstantFoldingPass.h>
-
-#include "TestHelper.h"
-
-#include <loco.h>
-
-#include <gtest/gtest.h>
-
-using namespace logo::test;
-
-TEST(ConstantFoldingTest, name)
-{
-  logo::ConstantFoldingPass pass;
-  auto const name = pass.name();
-  ASSERT_NE(nullptr, name);
-}
-
-TEST(ConstantFoldingTest, run_NEG)
-{
-  loco::Graph g;
-  logo::ConstantFoldingPass pass;
-
-  ASSERT_FALSE(pass.run(&g));
-}
-
-namespace
-{
-
-/*
-  test case:
-      ConstGen ---- Relu ---- Push
-   (-3.14, 3.14)      (0, 3.14)
-
-  after constant folding:
-                 ConstGen ------Push
-                      (0, 3.14)
-*/
-void create_net_const_relu(loco::Graph *graph)
-{
-  assert(graph);
-
-  auto const_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_node->dtype(loco::DataType::FLOAT32);
-    const_node->rank(1);
-    const_node->dim(0) = 2;
-    const_node->size<loco::DataType::FLOAT32>(2);
-    const_node->at<loco::DataType::FLOAT32>(0) = -3.14f;
-    const_node->at<loco::DataType::FLOAT32>(1) = 3.14f;
-  }
-
-  auto relu_node = graph->nodes()->create<loco::ReLU>();
-  {
-    relu_node->input(const_node);
-  }
-
-  auto push_node = graph->nodes()->create<loco::Push>();
-  {
-    push_node->from(relu_node);
-  }
-
-  auto graph_output = graph->outputs()->create();
-  {
-    graph_output->name("output");
-    graph_output->dtype(loco::DataType::FLOAT32);
-    loco::link(graph_output, push_node);
-  }
-}
-
-} // namespace
-
-TEST(ConstantFolding, const_relu_to_const)
-{
-  auto graph = loco::make_graph();
-  create_net_const_relu(graph.get());
-
-  logo::ConstantFoldingPass pass;
-  while (pass.run(graph.get()) == true)
-  {
-    ;
-  }
-
-  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
-  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
-  ASSERT_NE(const_gen, nullptr);
-
-  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 2);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0); // result of relu(-3.14)
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 3.14f);
-}
-
-namespace
-{
-
-/*
-  test case:
-        ConstGen ---- Relu ---+
-        (-1, 1)        (0, 1) |
-                  ConstGen ---+-- ConcatV2 ----- Push
-                  (2, 3)      |       (0, 1, 2, 3)
-                   axis(0) ---+
-
-  after constant folding:
-                                  ConstGen ----- Push
-                                  (0, 1, 2, 3)
-*/
-void create_net_const_relu_concat(loco::Graph *graph)
-{
-  assert(graph);
-
-  auto const_1_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_1_node->dtype(loco::DataType::FLOAT32);
-    const_1_node->rank(1);
-    const_1_node->dim(0) = 2;
-    const_1_node->size<loco::DataType::FLOAT32>(2);
-    const_1_node->at<loco::DataType::FLOAT32>(0) = -1.0f;
-    const_1_node->at<loco::DataType::FLOAT32>(1) = 1.0f;
-  }
-
-  auto relu_node = graph->nodes()->create<loco::ReLU>();
-  {
-    relu_node->input(const_1_node);
-  }
-
-  auto const_2_node = graph->nodes()->create<loco::ConstGen>();
-  {
-    const_2_node->dtype(loco::DataType::FLOAT32);
-    const_2_node->rank(1);
-    const_2_node->dim(0) = 2;
-    const_2_node->size<loco::DataType::FLOAT32>(2);
-    const_2_node->at<loco::DataType::FLOAT32>(0) = 2.0f;
-    const_2_node->at<loco::DataType::FLOAT32>(1) = 3.0f;
-  }
-
-  auto concat_node = graph->nodes()->create<loco::TensorConcat>();
-  {
-    concat_node->lhs(relu_node);
-    concat_node->rhs(const_2_node);
-    concat_node->axis(0);
-  }
-
-  auto push_node = graph->nodes()->create<loco::Push>();
-  {
-    push_node->from(concat_node);
-  }
-
-  auto graph_output = graph->outputs()->create();
-  {
-    graph_output->name("output");
-    graph_output->dtype(loco::DataType::FLOAT32);
-    loco::link(graph_output, push_node);
-  }
-}
-
-} // namespace
-
-TEST(ConstantFolding, const_relu_to_concat)
-{
-  auto graph = loco::make_graph();
-  create_net_const_relu_concat(graph.get());
-
-  logo::ConstantFoldingPass pass;
-  while (pass.run(graph.get()) == true)
-  {
-    ;
-  }
-
-  auto push = logo::test::find_first_node_by_type<loco::Push>(graph.get());
-  auto const_gen = loco::must_cast<loco::ConstGen *>(push->from());
-  ASSERT_NE(const_gen, nullptr);
-
-  ASSERT_EQ(const_gen->size<loco::DataType::FLOAT32>(), 4);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(0), 0);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(1), 1);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(2), 2);
-  ASSERT_EQ(const_gen->at<loco::DataType::FLOAT32>(3), 3);
-}
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md

index 4a9a34e6debb58e21ff698f8eb0eba5c77b9a089..77ec5c81c45ffba13ec512e625287bfb110572a8 100644 (file)
--- a/compiler/luci-interpreter/README.md
+++ b/compiler/luci-interpreter/README.md
@@ -111,7 +111,7 @@ Note that one memory manager could be shared between multiple interpreter instan
  
  List of predefined memory managers:
  - `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
-- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
  - `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
  - `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
  
diff --git a/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h

new file mode 100644 (file)

index 0000000..375b1ae
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h

index 7dee8a7f2dec6dab7d319d33e033f29303069d8f..8e2f457a5e1bad08959622f9d543f9405186b17f 100644 (file)
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -50,7 +50,9 @@ public:
  class Interpreter
  {
  public:
-  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr);
+  explicit Interpreter(const luci::Module *module);
+
+  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
  
    ~Interpreter();
  
@@ -69,7 +71,6 @@ private:
    // the order of deletion in the destructor
    std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
    std::unique_ptr<class RuntimeModule> _runtime_module;
-  IMemoryManager *_memory_manager = nullptr;
  
    // Observer functionality support.
    std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst

index 771974afe297b7dd21f2817cd11c2936bd9f03af..d134a6b95d4f8a37675e76cd04b2a1e1b3827386 100644 (file)
--- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
  REGISTER_KERNEL(Conv2D)
  REGISTER_KERNEL(DepthToSpace)
  REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
  REGISTER_KERNEL(Div)
  REGISTER_KERNEL(Elu)
  REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
  REGISTER_KERNEL(Floor)
  REGISTER_KERNEL(FloorDiv)
  REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
  REGISTER_KERNEL(Pad)
  REGISTER_KERNEL(PadV2)
  REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
  REGISTER_KERNEL(Reshape)
  REGISTER_KERNEL(ResizeBilinear)
  REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
  REGISTER_KERNEL(SquaredDifference)
  REGISTER_KERNEL(Squeeze)
  REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
  REGISTER_KERNEL(Tanh)
  REGISTER_KERNEL(Transpose)
  REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h

new file mode 100644 (file)

index 0000000..a274afb
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(scratchpad_data != nullptr);
+
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  assert(batches == 1);
+
+  const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = 1;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = 1;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = depth;
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.stride.h = params.stride_height;
+  pool_params.stride.w = params.stride_width;
+  pool_params.padding.h = params.padding_values.height;
+  pool_params.padding.w = params.padding_values.width;
+  pool_params.activation.min = params.quantized_activation_min;
+  pool_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = 1;
+  filter_dims.h = params.filter_height;
+  filter_dims.w = params.filter_width;
+  filter_dims.c = 1;
+
+  cmsis_nn_context ctx;
+  ctx.buf = scratchpad_data;
+  ctx.size = scratchpad_shape.Dims(0);
+  auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+                            output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    assert(input_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+    const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h

index 0a8ae4e48d10c681256afb4bbe6c8d5687f6413b..cfb84ea6078832523e01186324259de9df52c822 100644 (file)
--- a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -19,6 +19,8 @@
  
  #include <tensorflow/lite/kernels/internal/reference/conv.h>
  #include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
  
  namespace luci_interpreter_pal
  {
@@ -26,11 +28,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const float *input_data, const tflite::RuntimeShape &filter_shape,
                          const float *filter_data, const tflite::RuntimeShape &bias_shape,
                          const float *bias_data, const tflite::RuntimeShape &output_shape,
-                        float *output_data, const tflite::RuntimeShape &im2col_shape,
-                        float *im2col_data)
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
                                bias_shape, bias_data, output_shape, output_data,
                                tflite::RuntimeShape(), nullptr);
@@ -40,14 +42,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
                          const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
                          const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                        uint8 *im2col_data)
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
-                              im2col_data, nullptr);
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
  }
  
  static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,14 +57,141 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
                                    const int8 *input_data, const tflite::RuntimeShape &filter_shape,
                                    const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
                                    const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                                  int8 *im2col_data)
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
-  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
-                                                filter_shape, filter_data, bias_shape, bias_data,
-                                                output_shape, output_data);
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
  }
  
  } // namespace luci_interpreter_pal
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h

new file mode 100644 (file)

index 0000000..120dcd8
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_dw_conv_params dw_conv_params;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
+    assert(dw_conv_params.dilation.h == 1);
+    assert(dw_conv_params.dilation.w == 1);
+
+    dw_conv_params.input_offset = params.input_offset;
+    dw_conv_params.output_offset = params.output_offset;
+    dw_conv_params.stride.h = params.stride_height;
+    dw_conv_params.stride.w = params.stride_width;
+    dw_conv_params.padding.h = params.padding_values.height;
+    dw_conv_params.padding.w = params.padding_values.width;
+
+    dw_conv_params.activation.min = params.quantized_activation_min;
+    dw_conv_params.activation.max = params.quantized_activation_max;
+    dw_conv_params.ch_mult = params.depth_multiplier;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    int32_t output_multiplier = params.output_multiplier;
+    int32_t output_shift = params.output_shift;
+
+    quant_params.multiplier = &output_multiplier;
+    quant_params.shift = &output_shift;
+
+    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+                                             input_data, &filter_dims, filter_data, &bias_dims,
+                                             bias_data, &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::DepthwiseConvPerChannel(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.dilation.h = params.dilation_height_factor;
+  dw_conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+      dw_conv_params.dilation.w == 1)
+  {
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h

new file mode 100644 (file)

index 0000000..15ff032
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h

new file mode 100644 (file)

index 0000000..32e9057
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = params.input_offset;
+  fc_params.output_offset = params.output_offset;
+  fc_params.filter_offset = params.weights_offset;
+  fc_params.activation.min = params.quantized_activation_min;
+  fc_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_per_tensor_quant_params quant_params;
+  quant_params.multiplier = params.output_multiplier;
+  quant_params.shift = params.output_shift;
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = batches;
+  input_dims.h = 1;
+  input_dims.w = 1;
+  input_dims.c = accum_depth;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = accum_depth;
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_depth;
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = batches;
+  output_dims.h = 1;
+  output_dims.w = 1;
+  output_dims.c = output_depth;
+
+  int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+  auto buffer = std::make_unique<int8_t[]>(buf_size);
+  assert(buffer != nullptr);
+
+  cmsis_nn_context ctx;
+  ctx.buf = buffer.get();
+  ctx.size = buf_size;
+
+  auto res =
+    arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+                           filter_data, &bias_dims, bias_data, &output_dims, output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h

index 2b46b100cf35a121ac2873805676890665b0e8ff..347a97a831ddab63b496368826fb62043eb0f168 100644 (file)
--- a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -21,21 +21,21 @@
  
  namespace luci_interpreter_pal
  {
+template <typename T>
  static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
-                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
-                       const float *input2_data, const tflite::RuntimeShape &output_shape,
-                       float *output_data)
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
  {
    tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data);
  }
  
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
-                                      const tflite::RuntimeShape &input1_shape,
-                                      const float *input1_data,
-                                      const tflite::RuntimeShape &input2_shape,
-                                      const float *input2_data,
-                                      const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
  {
    tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h

new file mode 100644 (file)

index 0000000..6046789
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h

new file mode 100644 (file)

index 0000000..a4a5b2a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_shape.Dims(0);
+  input_dims.h = input_shape.Dims(1);
+
+  cmsis_nn_dims weights_feature_dims;
+  weights_feature_dims.n = weight_feature_shape.Dims(0);
+  weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+  cmsis_nn_dims weights_time_dims;
+  weights_time_dims.n = weight_time_shape.Dims(0);
+  weights_time_dims.h = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = bias_shape.Dims(0);
+
+  cmsis_nn_dims state_dims;
+  state_dims.n = batch_size;
+  state_dims.h = memory_size * num_filters;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_shape.Dims(0);
+  output_dims.h = output_shape.Dims(1);
+
+  cmsis_nn_svdf_params svdf_params;
+  svdf_params.rank = params.rank;
+  svdf_params.input_offset = input_zp;
+  svdf_params.output_offset = output_zp;
+
+  svdf_params.input_activation.min = INT16_MIN;
+  svdf_params.input_activation.max = INT16_MAX;
+
+  svdf_params.output_activation.min = INT8_MIN;
+  svdf_params.output_activation.max = INT8_MAX;
+
+  cmsis_nn_per_tensor_quant_params in_quant_params;
+  in_quant_params.multiplier = scale_1_a;
+  in_quant_params.shift = scale_1_b;
+
+  cmsis_nn_per_tensor_quant_params out_quant_params;
+  out_quant_params.multiplier = scale_2_a;
+  out_quant_params.shift = scale_2_b;
+
+  cmsis_nn_context scratch_ctx;
+  scratch_ctx.buf = scratchpad_data;
+
+  cmsis_nn_context scratch_output_ctx;
+  scratch_output_ctx.buf = output_temp_data;
+
+  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+              &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake

index 9a25a3c5dbda85a871979079902089e6813ee97e..a68b363d9f967c489d4a79405648d67d894b5ab3 100644 (file)
--- a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -42,9 +42,12 @@ macro(add_pal_to_target TGT)
              "${TensorFlowSource_DIR}")
      target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
  
-    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+    file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
      add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
-    set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
      target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
              "${TensorFlowRuySource_DIR}"
              "${TensorFlowGEMMLowpSource_DIR}"
@@ -53,7 +56,7 @@ macro(add_pal_to_target TGT)
      )
  
      add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
-    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
              "${CMSISSource_DIR}/CMSIS/NN/Include"
              "${CMSISSource_DIR}/CMSIS/DSP/Include"
              "${CMSISSource_DIR}/CMSIS/Core/Include")
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst

index 9d541276ca056e449ba840d23aca9265aba1d159..428b15ee0be8a87c4b45ed770c548260888c0ff5 100644 (file)
--- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -1,19 +1,23 @@
  REGISTER_KERNEL(Add)
  REGISTER_KERNEL(ArgMax)
  REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
  REGISTER_KERNEL(BatchToSpaceND)
  REGISTER_KERNEL(Cast)
  REGISTER_KERNEL(Concatenation)
  REGISTER_KERNEL(Conv2D)
  REGISTER_KERNEL(DepthToSpace)
  REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
  REGISTER_KERNEL(Div)
  REGISTER_KERNEL(Elu)
  REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
  REGISTER_KERNEL(Floor)
  REGISTER_KERNEL(FloorDiv)
  REGISTER_KERNEL(Equal)
  REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
  REGISTER_KERNEL(Greater)
  REGISTER_KERNEL(GreaterEqual)
  REGISTER_KERNEL(If)
@@ -37,11 +41,13 @@ REGISTER_KERNEL(MirrorPad)
  REGISTER_KERNEL(Mul)
  REGISTER_KERNEL(Neg)
  REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
  REGISTER_KERNEL(Pack)
  REGISTER_KERNEL(Pad)
  REGISTER_KERNEL(PadV2)
  REGISTER_KERNEL(Pow)
  REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
  REGISTER_KERNEL(Relu)
  REGISTER_KERNEL(Relu6)
  REGISTER_KERNEL(Reshape)
@@ -61,6 +67,7 @@ REGISTER_KERNEL(Square)
  REGISTER_KERNEL(SquaredDifference)
  REGISTER_KERNEL(Squeeze)
  REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
  REGISTER_KERNEL(Tanh)
  REGISTER_KERNEL(Transpose)
  REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h

new file mode 100644 (file)

index 0000000..cce3060
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h

new file mode 100644 (file)

index 0000000..3894f2d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+                        const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+                                     output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+                                         luci_interpreter::Tensor *rhs_scratchpad,
+                                         const tflite::RuntimeShape &lhs_shape,
+                                         const tflite::RuntimeShape &rhs_shape)
+{
+  // Scratchpad for transposed LHS
+  {
+    auto lhs_rank = lhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(lhs_rank);
+    for (int i = 0; i < lhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = lhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+    scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+    lhs_scratchpad->resize(scratchpad_size);
+  }
+  // Scratchpad for transposed RHS
+  {
+    auto rhs_rank = rhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(rhs_rank);
+    for (int i = 0; i < rhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = rhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+    scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+    rhs_scratchpad->resize(scratchpad_size);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h

index 2550dd5d71e240339cb1793ac8298e7265569991..985a15f396686ca5f1be10dd779f04a8f2304fb2 100644 (file)
--- a/compiler/luci-interpreter/pal/linux/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -26,14 +26,24 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const float *input_data, const tflite::RuntimeShape &filter_shape,
                          const float *filter_data, const tflite::RuntimeShape &bias_shape,
                          const float *bias_data, const tflite::RuntimeShape &output_shape,
-                        float *output_data, const tflite::RuntimeShape &im2col_shape,
-                        float *im2col_data)
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
  {
-  if (im2col_data)
+  (void)scratchpad_shape;
+  if (scratchpad_data)
    {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+                                      input_depth * filter_height * filter_width};
+
      tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
                                  bias_shape, bias_data, output_shape, output_data, im2col_shape,
-                                im2col_data);
+                                scratchpad_data);
    }
    else
      tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
@@ -45,8 +55,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
                          const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
                          const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                        uint8 *im2col_data)
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
  {
    // TODO This should only be done once (although it takes only a few microseconds).
    //  Also, the user should be able to adjust the number of threads.
@@ -54,8 +64,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
    gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
  
    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
-                              im2col_data, gemmlowp_context.get());
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, gemmlowp_context.get());
  }
  
  static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -63,17 +73,55 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
                                    const int8 *input_data, const tflite::RuntimeShape &filter_shape,
                                    const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
                                    const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                                  int8 *im2col_data)
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    // TODO enable optimized version
    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
                                                  filter_shape, filter_data, bias_shape, bias_data,
                                                  output_shape, output_data);
  }
  
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  const int32_t filter_height = filter_shape.Dims(1);
+  const int32_t filter_width = filter_shape.Dims(2);
+
+  // Allocate tensor for scratchpad, if needed.
+  // The checks here should be aligned with the actual implementation.
+  const bool need_dilated_scratchpad =
+    params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+  const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+                                           filter_height != 1 || filter_width != 1;
+  auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+                          (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+  if (_need_scratchpad)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+    int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+                              filter_width * data_type_size;
+    luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
  } // namespace luci_interpreter_pal
  
  #endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h

new file mode 100644 (file)

index 0000000..c9d1a29
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-interpreter/pal/linux/PALDequantize.h

new file mode 100644 (file)

index 0000000..3af6d07
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h

new file mode 100644 (file)

index 0000000..62970db
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-interpreter/pal/linux/PALGather.h

new file mode 100644 (file)

index 0000000..49ac35f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+                          const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+                                output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h

index cfaec1b58720f0ca9a182700e5fd471e86589773..a8a9d4abc8069f8c77a64574e9b7e22dd03dd185 100644 (file)
--- a/compiler/luci-interpreter/pal/linux/PALMul.h
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -21,21 +21,31 @@
  
  namespace luci_interpreter_pal
  {
+template <typename T>
  static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
-                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
-                       const float *input2_data, const tflite::RuntimeShape &output_shape,
-                       float *output_data)
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
  {
    tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
                               output_shape, output_data);
  }
  
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
-                                      const tflite::RuntimeShape &input1_shape,
-                                      const float *input1_data,
-                                      const tflite::RuntimeShape &input2_shape,
-                                      const float *input2_data,
-                                      const tflite::RuntimeShape &output_shape, float *output_data)
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+                const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+                int64_t *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
  {
    tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-interpreter/pal/linux/PALQuantize.h

new file mode 100644 (file)

index 0000000..bf1d795
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-interpreter/pal/linux/PALSVDF.h

new file mode 100644 (file)

index 0000000..0ffba14
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                         weight_feature_data, weight_time_shape, weight_time_data,
+                                         bias_shape, bias_data, activation_state_data, output_shape,
+                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
+                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                       weight_feature_data, weight_time_shape, weight_time_data,
+                                       bias_shape, bias_data, scratchpad_data,
+                                       activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake

index 84349e0bfb1bdf59ccd89e41322cc3b0d9db0226..185700cf9a67daf39909d95ca9bf4ec86fd126c2 100644 (file)
--- a/compiler/luci-interpreter/pal/linux/pal.cmake
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -40,7 +40,35 @@ macro(add_pal_to_target TGT)
  
      # TODO put it back, I changed my mind.
      # instead add sources with visitors in this library
-    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+    if(BUILD_ARM32_NEON)
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+                )
+    endif(BUILD_ARM32_NEON)
+
      add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
      set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
      target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst

index 771974afe297b7dd21f2817cd11c2936bd9f03af..d134a6b95d4f8a37675e76cd04b2a1e1b3827386 100644 (file)
--- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
  REGISTER_KERNEL(Conv2D)
  REGISTER_KERNEL(DepthToSpace)
  REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
  REGISTER_KERNEL(Div)
  REGISTER_KERNEL(Elu)
  REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
  REGISTER_KERNEL(Floor)
  REGISTER_KERNEL(FloorDiv)
  REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
  REGISTER_KERNEL(Pad)
  REGISTER_KERNEL(PadV2)
  REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
  REGISTER_KERNEL(Reshape)
  REGISTER_KERNEL(ResizeBilinear)
  REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
  REGISTER_KERNEL(SquaredDifference)
  REGISTER_KERNEL(Squeeze)
  REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
  REGISTER_KERNEL(Tanh)
  REGISTER_KERNEL(Transpose)
  REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h

new file mode 100644 (file)

index 0000000..cce3060
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h

index 0a8ae4e48d10c681256afb4bbe6c8d5687f6413b..13976877a9b3d2c34b2d33bb141a80e44d7b7616 100644 (file)
--- a/compiler/luci-interpreter/pal/mcu/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -26,11 +26,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const float *input_data, const tflite::RuntimeShape &filter_shape,
                          const float *filter_data, const tflite::RuntimeShape &bias_shape,
                          const float *bias_data, const tflite::RuntimeShape &output_shape,
-                        float *output_data, const tflite::RuntimeShape &im2col_shape,
-                        float *im2col_data)
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
                                bias_shape, bias_data, output_shape, output_data,
                                tflite::RuntimeShape(), nullptr);
@@ -40,14 +40,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                          const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
                          const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
                          const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                        uint8 *im2col_data)
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
-                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
-                              im2col_data, nullptr);
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
  }
  
  static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,16 +55,31 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
                                    const int8 *input_data, const tflite::RuntimeShape &filter_shape,
                                    const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
                                    const int32 *bias_data, const tflite::RuntimeShape &output_shape,
-                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
-                                  int8 *im2col_data)
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
  {
-  (void)im2col_shape;
-  (void)im2col_data;
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
                                                  filter_shape, filter_data, bias_shape, bias_data,
                                                  output_shape, output_data);
  }
  
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  (void)input_data_type;
+  (void)params;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+  scratchpad->set_allocatable(false);
+}
+
  } // namespace luci_interpreter_pal
  
  #endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h

new file mode 100644 (file)

index 0000000..c9d1a29
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h

new file mode 100644 (file)

index 0000000..15ff032
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h

new file mode 100644 (file)

index 0000000..048624d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h

index 2b46b100cf35a121ac2873805676890665b0e8ff..347a97a831ddab63b496368826fb62043eb0f168 100644 (file)
--- a/compiler/luci-interpreter/pal/mcu/PALMul.h
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -21,21 +21,21 @@
  
  namespace luci_interpreter_pal
  {
+template <typename T>
  static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
-                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
-                       const float *input2_data, const tflite::RuntimeShape &output_shape,
-                       float *output_data)
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
  {
    tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data);
  }
  
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
-                                      const tflite::RuntimeShape &input1_shape,
-                                      const float *input1_data,
-                                      const tflite::RuntimeShape &input2_shape,
-                                      const float *input2_data,
-                                      const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
  {
    tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
                                              input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h

new file mode 100644 (file)

index 0000000..6046789
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-interpreter/pal/mcu/PALSVDF.h

new file mode 100644 (file)

index 0000000..3bba668
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int n_rank = params.rank;
+  const int n_batch = input_shape.Dims(0);
+  const int n_input = input_shape.Dims(1);
+  const int n_filter = weight_feature_shape.Dims(0);
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    int16_t *new_state_start = activation_state_data;
+    const int16_t *old_state_start = activation_state_data + 1;
+    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Feature matmul.
+  {
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int8_t *matrix_ptr = weight_feature_data;
+      for (int r = 0; r < n_filter; r++)
+      {
+        int32_t dot_prod = 0;
+        const int8_t *vector_in_batch = input_data + b * n_input;
+        for (int c = 0; c < n_input; c++)
+        {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        // This assumes state is symmetrically quantized. Otherwise last bit of
+        // state should be initialized to its zero point and accumulate the
+        // dot_prod.
+        // Equivalent as the following:
+        //     result_in_batch = zero point, which happens to be zero.
+        //     result_in_batch += dot_prod_56.
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t *vector1_ptr = weight_time_data;
+      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++)
+      {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++)
+        {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    // Add bias.
+    if (bias_data)
+    {
+      // Vector batch assign:
+      for (int i = 0; i < n_batch; ++i)
+      {
+        int32_t *output_ptr = output_temp_data + i * n_unit;
+        const int32_t *bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j)
+        {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    }
+    else
+    {
+      int32_t *output_ptr = output_temp_data;
+      for (int i = 0; i < n_batch * n_unit; ++i)
+      {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i)
+      {
+        for (int j = 0; j < n_rank; ++j)
+        {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i)
+    {
+      int32_t x1 = output_temp_data[i];
+      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      output_data[i] = static_cast<int8_t>(x4);
+    }
+  }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake

index a479d407bf8f90e2ef0721751fba88d7b9c7bbf9..907d51de63d8dc43242bf2aa1129d0935577338f 100644 (file)
--- a/compiler/luci-interpreter/pal/mcu/pal.cmake
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -39,7 +39,9 @@ macro(add_pal_to_target TGT)
  
      # TODO put it back, I changed my mind.
      # instead add sources with visitors in this library
-    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
      add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
      set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
      target_include_directories(luci_interpreter_mcu_pal PRIVATE
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt

index e371503360d0d7a97748ae863dea33c50c0fc00f..997b75a8493cbe2365929b177e32e77a5db29a3f 100644 (file)
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -13,6 +13,7 @@ set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
  set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
  set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
  set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
  
  add_subdirectory(core)
  message(STATUS "LUCI INTERPRETER CORE")
@@ -20,6 +21,8 @@ add_subdirectory(kernels)
  message(STATUS "LUCI INTERPRETER KERNELS")
  add_subdirectory(loader)
  message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
  
  message(STATUS "LUCI INTERPTER INITALIZED")
  
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp

index 1b8792a6cd583de27605fd45aab8315f7148764e..8cf272efdaffc520eef121f0879c432daf87176e 100644 (file)
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -70,25 +70,30 @@ private:
  
  } // namespace
  
+Interpreter::Interpreter(const luci::Module *module)
+{
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      _default_memory_manager.get());
+  loader.load();
+}
+
  Interpreter::Interpreter(const luci::Module *module,
                           luci_interpreter::IMemoryManager *memory_manager)
  {
+  assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
    _runtime_to_ir = std::make_unique<RuntimeToIR>();
    _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
    _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
  
-  if (memory_manager == nullptr)
-  {
-    _default_memory_manager = std::make_unique<SimpleMemoryManager>();
-    _memory_manager = _default_memory_manager.get();
-  }
-  else
-  {
-    _memory_manager = memory_manager;
-  }
-
    ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
-                      _memory_manager);
+                      memory_manager);
    loader.load();
  }
  
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt

index 4430cba11af057f7c7558081c7545e4dda571620..c2471e01c5a83ce2a48400effc4850294b208d91 100644 (file)
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -10,7 +10,9 @@ set(SOURCES
      Tensor.cpp)
  
  add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
  target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
  target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h

index ee0390fcc24a2073a90e964d22bf581395ed4c80..958fd4b74c8ca524543a6ca69d2b96ae15ab572a 100644 (file)
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -43,6 +43,12 @@ struct ArgMaxParams
    DataType output_type;
  };
  
+struct BatchMatMulParams
+{
+  bool adj_x;
+  bool adj_y;
+};
+
  struct ConcatenationParams
  {
    int axis;
@@ -83,6 +89,13 @@ struct DivParams
  struct FullyConnectedParams
  {
    Activation activation;
+  bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+  int32_t axis;
+  int32_t batch_dims;
  };
  
  struct InstanceNormParams
@@ -119,6 +132,11 @@ struct MulParams
    Activation activation;
  };
  
+struct OneHotParams
+{
+  int32_t axis;
+};
+
  struct PackParams
  {
    int32_t values_count;
@@ -157,6 +175,13 @@ struct SubParams
    Activation activation;
  };
  
+struct SVDFParams
+{
+  bool asymmetric_quantize_inputs;
+  int32_t svdf_rank;
+  Activation activation;
+};
+
  struct SpaceToDepthParams
  {
    int block_size;
diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt

new file mode 100644 (file)

index 0000000..dd9733f
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+    GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp

new file mode 100644 (file)

index 0000000..a33bca6
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+  auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+  {
+    // redefine NodeBuilder of BUFFER type
+    builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+  }
+
+  return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp

new file mode 100644 (file)

index 0000000..14e90f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+  const uint8_t *data = nullptr;
+  uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+                                                     GraphBuilderContext *context) const
+{
+  assert(tensor_index >= 0);
+
+  const auto graph = context->graph();
+  const auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  auto const const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
+
+  auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+  if (const_dims.empty() && buffer.empty())
+  {
+    // unknown shape tensor and scalar tensor
+    return nullptr;
+  }
+
+  // if tensor_index is used as output to some other operator, this is not a constant
+  auto tensoroutputs = context->tensoroutputs();
+  if (tensoroutputs->find(tensor_index))
+  {
+    // other operator output tensor
+    return nullptr;
+  }
+
+  uint32_t num_elements = 1;
+  for (uint32_t r = 0; r < const_dims.size(); ++r)
+  {
+    num_elements = num_elements * const_dims[r];
+  }
+
+  if (buffer.empty() && num_elements > 0)
+  {
+    // normal empty tensor
+    return nullptr;
+  }
+
+  // create CircleReferencingConst
+  auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+  {
+    custom_node->custom_code("CircleReferencingConst");
+
+    copy_tensor_attributes(const_tensor, custom_node);
+    custom_node->shape_status(luci::ShapeStatus::VALID);
+
+    // custom options stores size of buffer and pointer's value to buffer's data
+    {
+      std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+      {
+        auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+        const_data_ref = {buffer.data(), buffer.size()};
+      }
+      custom_node->custom_options(custom_options);
+    }
+  }
+
+  // Output of CircleCustom node presented with CircleConstNode
+  auto out_node = graph->nodes()->create<CircleCustomOut>();
+  {
+    out_node->index(0);
+    out_node->input(custom_node);
+
+    copy_tensor_attributes(const_tensor, out_node);
+    out_node->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h

new file mode 100644 (file)

index 0000000..ed8f951
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp

index 7381c384969b313130862fe64c4e9e14ddf3e01d..d7bf3084f3f99b46d45026bf4642a5fa45b5528a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -38,8 +38,11 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa
  void Add::configure()
  {
    LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
    if (input1()->element_type() == DataType::S16)
    {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1);
      LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
                             output()->zero_point() == 0);
    }
@@ -54,6 +57,12 @@ void Add::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -67,13 +76,8 @@ void Add::execute() const
  
  void Add::evalFloat() const
  {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
    tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<float>(params, _params.activation);
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
      getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -92,6 +96,28 @@ void Add::evalFloat() const
    }
  }
  
+template <typename T> void Add::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
  void Add::evalQuantized() const
  {
    const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h

index 79518845d26bf4af61063ff7b6fc39e85ab040fe..91d95b6af4f8b355833065fb3fff11a2b8365562 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.h
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -39,6 +39,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
    void evalQuantizedS16() const;
  };
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp

index 847b65667402f3433b07382ebff7a6e5c4ce6f8d..b8b1c3089c1e19a7ae710f81828c1b8a28b6fdec 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -166,6 +166,69 @@ TEST_F(AddTest, Float)
    }
  }
  
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {3, 3, 0, 1, 0, 8, 5,  1, 0, 0, 2, 6, 8,  0, 1, 0, 5, 1,
+     5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+    {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+    {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0,  0, 8, 0, 5, 0, 1,  0,
+     0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+    {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(AddTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  TEST_F(AddTest, SInt16)
  {
    Shape base_shape = {2, 3, 1, 2};
@@ -248,11 +311,24 @@ TEST_F(AddTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST_F(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Output_Type_NEG)
  {
    Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
    Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S64);
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
  
    AddParams params{};
    params.activation = Activation::RELU;
@@ -263,6 +339,19 @@ TEST_F(AddTest, Invalid_Input_Type_NEG)
    EXPECT_ANY_THROW(kernel.execute());
  }
  
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp

index 119c69ccf0ad417dd4cd059c9f49e3a56efb4a9f..474f4b32191311c0c6846d3f12e4980da970a14a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -57,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ArgMaxTest, DataTypes);
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
  
  TYPED_TEST(ArgMaxTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp

index 5545fb4d4de15264d597fad8a4e3dc7b0d161e61..d3bade9e4f94e222359b5771b550e9a299312dc9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -18,8 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include "PALAveragePool2d.h"
  
  #include <stdexcept>
  
@@ -29,8 +28,9 @@ namespace luci_interpreter
  namespace kernels
  {
  
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
-  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                             const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
  {
  }
  
@@ -76,6 +76,10 @@ void AveragePool2D::configure()
      LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
    }
    output()->resize({batches, output_height, output_width, depth});
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(output()));
  }
  
  void AveragePool2D::execute() const
@@ -155,9 +159,14 @@ void AveragePool2D::evalSInt8() const
    params.quantized_activation_min = activation_min;
    params.quantized_activation_max = activation_max;
  
-  tflite::reference_integer_ops::AveragePool(
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::AveragePool<int8_t>(
      params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
-    getTensorData<int8_t>(output()));
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
  }
  
  void AveragePool2D::evalSInt16() const
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h

index b98367f3115da143fba803b3c48b26dc9b942d19..2c8fe16e76472db8503cf3b816319ce09482e810 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -28,7 +28,8 @@ namespace kernels
  class AveragePool2D : public KernelWithParams<Pool2DParams>
  {
  public:
-  AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+  AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                const Pool2DParams &params);
  
    const Tensor *input() const { return _inputs[0]; }
    Tensor *output() const { return _outputs[0]; }
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp

index 7ed421129f99e47c17926bf67eb8db371e018d81..478bfa68ecb86f787a481b3232e13bb6e3220beb 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -46,6 +46,7 @@ TEST_F(AveragePool2DTest, Float)
    Tensor input_tensor =
      makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -55,8 +56,9 @@ TEST_F(AveragePool2DTest, Float)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
    _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
@@ -78,6 +80,7 @@ TEST_F(AveragePool2DTest, Uint8_0)
    Tensor input_tensor = makeInputTensor<DataType::U8>(
      {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -87,8 +90,9 @@ TEST_F(AveragePool2DTest, Uint8_0)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
    _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
@@ -107,6 +111,7 @@ TEST_F(AveragePool2DTest, Uint8_1)
    Tensor input_tensor = makeInputTensor<DataType::U8>(
      {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -116,9 +121,10 @@ TEST_F(AveragePool2DTest, Uint8_1)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
@@ -141,6 +147,7 @@ TEST_F(AveragePool2DTest, SInt16)
    Tensor input_tensor =
      makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -150,8 +157,9 @@ TEST_F(AveragePool2DTest, SInt16)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
    _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
@@ -174,6 +182,7 @@ TEST_F(AveragePool2DTest, SInt8)
    Tensor input_tensor = makeInputTensor<DataType::S8>(
      input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -183,8 +192,9 @@ TEST_F(AveragePool2DTest, SInt8)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
    _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
@@ -203,6 +213,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
    Tensor input_tensor =
      makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -212,7 +223,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -227,6 +238,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
    Tensor input_tensor =
      makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -236,7 +248,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -252,6 +264,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
    Tensor input_tensor = makeInputTensor<DataType::U8>(
      {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
  
    Pool2DParams params{};
    params.padding = Padding::VALID;
@@ -261,7 +274,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
    params.stride_width = 2;
    params.activation = Activation::RELU6;
  
-  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp

new file mode 100644 (file)

index 0000000..24ca229
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    throw std::runtime_error("Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h

new file mode 100644 (file)

index 0000000..744f497
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+  BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+              const BatchMatMulParams &params);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  Tensor *temp_lhs() const { return _outputs[1]; }
+  Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp

new file mode 100644 (file)

index 0000000..edfa3a6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = true;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = true;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+                              767., 800., 833., 866.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+  Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp

index f3a34497448e9f1534b32f82b8cee4e70ccf8807..52647a763d3c251a38e84b0e3b6d303cbe5f2e43 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -58,7 +58,7 @@ template <typename T> class BatchToSpaceNDTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes);
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
  
  TYPED_TEST(BatchToSpaceNDTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt

index 1b7d0f66a8e288262187baf129cca25fffa536f4..9f4ba0e0baefd8620671def87f258332643db630 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -15,7 +15,9 @@ endmacro(REGISTER_KERNEL)
  include(${KERNEL_REGISTER_FILE})
  
  add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
  
  target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp

index 731260522773ebdab234e8dbbcff96fd05dd3d39..4713ad34c95554675583ebf0ac6c4d9e6ae413a0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -79,7 +79,7 @@ template <typename T> class CastTest : public ::testing::Test
  
  using IntDataTypes =
    ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
-TYPED_TEST_CASE(CastTest, IntDataTypes);
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
  
  TYPED_TEST(CastTest, FloatToInt)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp

index 7cfdf34b9e4599d337b40599caf82bdb3824e210..46ee5941efea24cdbf4b31435d90955fcc207329 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -69,11 +69,21 @@ void Concatenation::configure()
    Shape output_shape = t0->shape();
    output_shape.dim(axis) = sum_axis;
  
-  // TODO S8 type needs more checking: quantization parameters of all input tensors and the output
-  //  tensor should be the same. Note that there is no such requirement for U8 type.
-  if (t0->element_type() == DataType::S8)
-    throw std::runtime_error("Unsupported type.");
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (auto current_tensor : _inputs)
+  {
+    if (current_tensor->element_type() == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+                             output()->quantized_dimension());
  
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+                             current_tensor->scales().size());
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+      LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+    }
+  }
    output()->resize(output_shape);
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp

index e4b50611ab25d512c7099c9926d0cb14e070b99f..f893b38fd86f4ed91f8e3cdd2f5918409aaf9e23 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -183,12 +183,12 @@ TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
  {
-  std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
-  std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get());
+  std::vector<uint8_t> input1_data{1, 2, 3, 4};
+  std::vector<int8_t> input2_data{5, 6, 7, 8};
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S8);
    ConcatenationParams params{};
  
@@ -199,6 +199,51 @@ TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  int quantized_dimension = 3;
+  std::vector<float> scales{0.1, 0.2, 0.3};
+  std::vector<int32_t> zero_points{1, -1, 1};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4};
+  std::vector<float> input2_data{5, 6, 7, 8};
+  float scale = 0.1;
+  int32_t zero_point_1 = 1;
+  int32_t zero_point_2 = -1;
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  // TODO: Remove this test when concat w/ fused_activation is supported
  TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp

index 5647f4c443548fec3e0678d1fc35d55953fc4ec7..234f95425d9b278a50925b9d466cace6d9dc0d7c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -30,8 +30,8 @@ namespace kernels
  {
  
  Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-               Tensor *im2col, const Conv2DParams &params)
-  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params)
+               Tensor *scratchpad, const Conv2DParams &params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
  {
  }
  
@@ -108,27 +108,18 @@ void Conv2D::configure()
  
    output()->resize({batches, output_height, output_width, output_depth});
  
-  // Allocate tensor for Im2Col, if needed.
-  // The checks here should be aligned with the actual implementation.
-  const bool need_dilated_im2col =
-    _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
-  const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
-                                       filter_height != 1 || filter_width != 1;
-  _need_im2col =
-    input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
-  if (_need_im2col)
-  {
-    const int input_depth = input_shape.dim(3);
-    Shape im2col_shape{batches, output_height, output_width,
-                       input_depth * filter_height * filter_width};
-    auto im2col = getOutputTensors()[1];
-    im2col->resize(im2col_shape);
-  }
-  else
-  {
-    auto im2col = getOutputTensors()[1];
-    im2col->set_allocatable(false);
-  }
+  // Allocate tensor for scratchpad, if needed.
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
  
    switch (_params.activation)
    {
@@ -193,16 +184,16 @@ void Conv2D::evalFloat() const
    params.float_activation_min = activation_min;
    params.float_activation_max = activation_max;
  
-  float *im2col_data = nullptr;
-  auto im2col = getOutputTensors()[1];
-  if (_need_im2col)
-  {
-    im2col_data = im2col->data<float>();
-  }
-  luci_interpreter_pal::Conv(
-    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-    getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data);
+  auto scratchpad = getOutputTensors()[1];
+  float *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<float>();
+
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+                             getTensorShape(filter()), getTensorData<float>(filter()),
+                             getTensorShape(bias()), getTensorData<float>(bias()),
+                             getTensorShape(output()), getTensorData<float>(output()),
+                             getTensorShape(scratchpad), scratchpad_data);
  }
  
  void Conv2D::evalQuantized() const
@@ -236,12 +227,12 @@ void Conv2D::evalQuantized() const
    params.quantized_activation_min = activation_min;
    params.quantized_activation_max = activation_max;
  
-  auto im2col = getOutputTensors()[1];
+  auto scratchpad = getOutputTensors()[1];
    luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
                               getTensorShape(filter()), getTensorData<uint8_t>(filter()),
                               getTensorShape(bias()), getTensorData<int32_t>(bias()),
                               getTensorShape(output()), getTensorData<uint8_t>(output()),
-                             getTensorShape(im2col), getTensorData<uint8_t>(im2col));
+                             getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
  }
  
  void Conv2D::evalQuantizedPerChannel() const
@@ -364,18 +355,16 @@ void Conv2D::evalQuantizedS8PerChannel() const
                   std::back_inserter(multipliers),
                   [](ChannelQuantMultipliers cm) { return cm.multiplier; });
  
-  int8_t *im2col_data = nullptr;
-  auto im2col = getOutputTensors()[1];
-  if (_need_im2col)
-  {
-    im2col_data = im2col->data<int8_t>();
-  }
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
  
    luci_interpreter_pal::ConvPerChannel(
      params, multipliers.data(), shifts.data(), getTensorShape(input()),
      getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
      getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
-    getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data);
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
  }
  
  void Conv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h

index 5f13176386ab63815a03fb4197b7f0f0a84053cd..330bf3a2a69354844758e8cba7fb802d781059e7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
  {
  public:
    Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-         Tensor *im2col, const Conv2DParams &params);
+         Tensor *scratchpad, const Conv2DParams &params);
  
    const Tensor *input() const { return _inputs[0]; }
    const Tensor *filter() const { return _inputs[1]; }
@@ -49,7 +49,6 @@ private:
    void evalQuantizedS16() const;
  
  private:
-  bool _need_im2col = false;
    int32_t _padding_height{};
    int32_t _padding_width{};
  };
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp

index 9b1c09ba9bb05bd7d5db41b328bf9b5bbdf80a1b..88e6e07f1d909e4a45a150976e5850a1d601c57b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
  
  TYPED_TEST(DepthToSpaceTest, SimpleCase)
  {
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp

index f2dbf6c68c46d82f38ef31afb06e3821b2fa310f..c554c309d14758d4a11f8951b508042f2b8fc301 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -18,9 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include "PALDepthwiseConv2d.h"
  
  #include <stdexcept>
  
@@ -30,8 +28,9 @@ namespace kernels
  {
  
  DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
-                                 Tensor *output, const DepthwiseConv2DParams &params)
-  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
  {
  }
  
@@ -109,6 +108,16 @@ void DepthwiseConv2D::configure()
                                    filter_width, output_width);
  
    output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
  }
  
  void DepthwiseConv2D::execute() const
@@ -337,11 +346,16 @@ void DepthwiseConv2D::evalQuantizedS8PerChannel() const
                   std::back_inserter(multipliers),
                   [](ChannelQuantMultipliers cm) { return cm.multiplier; });
  
-  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
      params, multipliers.data(), shifts.data(), getTensorShape(input()),
      getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
      getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
-    getTensorData<int8_t>(output()));
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
  }
  
  void DepthwiseConv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h

index 6cffd6583a41ad2ea9805eb54c1e54d4762c394e..3d1faf6c1cca049103dff39c3459329c6bc6bec2 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
  {
  public:
    DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-                  const DepthwiseConv2DParams &params);
+                  Tensor *scratchpad, const DepthwiseConv2DParams &params);
  
    const Tensor *input() const { return _inputs[0]; }
    const Tensor *filter() const { return _inputs[1]; }
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp

index 74975899a3db6a3d6fc9613e01926f626becef7e..6b4673f3e2023d9a320ba44c39e8578d5deecd4a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -59,6 +59,7 @@ TEST_F(DepthwiseConv2DTest, Float)
      makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
    Tensor bias_tensor =
      makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
@@ -70,8 +71,10 @@ TEST_F(DepthwiseConv2DTest, Float)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
    _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
@@ -111,6 +114,7 @@ TEST_F(DepthwiseConv2DTest, Uint8)
      {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -121,9 +125,11 @@ TEST_F(DepthwiseConv2DTest, Uint8)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -166,6 +172,7 @@ TEST_F(DepthwiseConv2DTest, SInt16)
    Tensor bias_tensor =
      makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S64, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -176,9 +183,11 @@ TEST_F(DepthwiseConv2DTest, SInt16)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -224,6 +233,7 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
    Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
                                                        _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -234,9 +244,11 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -299,6 +311,7 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
                                                        _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -309,9 +322,11 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -375,6 +390,7 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
                                                        _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -385,9 +401,11 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    kernel.configure();
    _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -419,6 +437,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
      makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
    Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -429,7 +448,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -458,6 +478,7 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
    Tensor bias_tensor =
      makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -468,7 +489,8 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -497,6 +519,7 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
    Tensor bias_tensor =
      makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -507,7 +530,8 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -536,6 +560,7 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
    Tensor bias_tensor =
      makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -546,7 +571,8 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
@@ -575,6 +601,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
    Tensor bias_tensor =
      makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
  
    DepthwiseConv2DParams params{};
    params.padding = Padding::VALID;
@@ -585,7 +612,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp

new file mode 100644 (file)

index 0000000..96399e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+                         input()->element_type() == loco::DataType::U8 ||
+                         input()->element_type() == loco::DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h

new file mode 100644 (file)

index 0000000..5565df0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+  Dequantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp

new file mode 100644 (file)

index 0000000..0cab633
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+  std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+  std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp

index 0e52ba1f0aaa1a600edebb2486bd19cbf722637a..dd153227850688802506d7ae06992f458f78ae04 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -46,6 +46,12 @@ void Div::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -56,13 +62,9 @@ void Div::execute() const
  
  void Div::evalFloat() const
  {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
    tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
      getTensorShape(input1()), getTensorShape(input2()), &params);
  
@@ -80,6 +82,28 @@ void Div::evalFloat() const
    }
  }
  
+template <typename T> void Div::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
  void Div::evalQuantized() const
  {
    const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h

index 6040cdd02ac12b393b8a7d01df713a2839146eaa..c1bf3e10bd5d9903ba92475960807ab6d9d95316 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.h
+++ b/compiler/luci-interpreter/src/kernels/Div.h
@@ -39,6 +39,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  };
  
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp

index 021d68d068230db38054512823e241ffcf63fde7..85cd8b90aff80e81ab744bcde5bb7d801032cd95 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -134,6 +134,56 @@ TEST_F(DivTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
  }
  
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  std::vector<std::vector<dtype>> test_outputs = {{5,  6,  2, 0,  10, 3, //
+                                                   10, 0,  4, 5,  20, 0, //
+                                                   0,  0,  0, 2,  0,  0, //
+                                                   2,  0,  1, 10, 5,  0, //
+                                                   2,  3,  1, 0,  5,  1, //
+                                                   18, 20, 7, 0,  37, 10},
+                                                  {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+                                                  {5, 7, 4, 6, 2, 3, 10, 0,  8,  0,  4, 0,
+                                                   0, 0, 0, 0, 0, 0, 0,  10, 5,  0,  1, 0,
+                                                   0, 0, 5, 9, 1, 1, 0,  0,  37, 50, 7, 10},
+                                                  {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+  std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+  std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    DivParams params{};
+    params.activation = Activation::RELU;
+
+    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(DivTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
  TEST_F(DivTest, Input_Output_Type_NEG)
  {
    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -149,9 +199,9 @@ TEST_F(DivTest, Input_Output_Type_NEG)
  
  TEST_F(DivTest, Invalid_Input_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S64);
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
  
    DivParams params{};
    params.activation = Activation::RELU;
@@ -162,6 +212,19 @@ TEST_F(DivTest, Invalid_Input_Type_NEG)
    EXPECT_ANY_THROW(kernel.execute());
  }
  
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp

index f58de1250e10c66e7dd6ebfc8f7e12c5b122b1f8..a57e127b7f8c15e0dcdd2089ab63f5c0414459be 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.cpp
@@ -49,6 +49,12 @@ void Equal::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -79,6 +85,29 @@ void Equal::evalFloat() const
    }
  }
  
+template <typename T> void Equal::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                         getTensorShape(y()), y_data,
+                                                         getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                          getTensorShape(y()), y_data, getTensorShape(output()),
+                                          output_data);
+  }
+}
+
  void Equal::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h

index 11f025eacd7118069369cdc21f1cdc77926ad6c4..c9be32cc096828f99845ab4d8e58762d96928fef 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.h
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp

index 46a0f97d8eb5bf8a95cab70ebe0d4ea422d0b9c0..5870e54602d1e0324b1091a96308ad34dc45718d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -99,6 +99,82 @@ TEST_F(EqualTest, FloatBroardcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
  }
  
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+    false, true,  false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(EqualTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp

new file mode 100644 (file)

index 0000000..ba35c99
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+  : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+  int32_t axis_value;
+
+  switch (axis()->element_type())
+  {
+    case loco::DataType::S32:
+      axis_value = *getTensorData<int32_t>(axis());
+      break;
+    case loco::DataType::S64:
+      axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  const auto input_shape = input()->shape();
+
+  if (axis_value < 0)
+  {
+    axis_value += input_shape.num_dims() + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+  Shape output_shape(input_shape.num_dims() + 1);
+  for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+  {
+    if (i < axis_value)
+    {
+      output_shape.dim(i) = input_shape.dim(i);
+    }
+    else if (i == axis_value)
+    {
+      output_shape.dim(i) = 1;
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(i >= 1);
+      output_shape.dim(i) = input_shape.dim(i - 1);
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+  // Just copy input to output
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h

new file mode 100644 (file)

index 0000000..e510b11
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+  ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp

new file mode 100644 (file)

index 0000000..df9eacc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {-1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<float> axis_value = {1.0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {3};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp

index cfe8f8bf26aef969c6cc16a47157369b57496c07..bd2bb2f35989c6ebce2ba2d5974496208b29a2c4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -18,8 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include "PALFullyConnected.h"
  
  #include <stdexcept>
  
@@ -74,7 +73,18 @@ void FullyConnected::configure()
    if (bias())
      LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
  
-  output()->resize({batch_size, num_units});
+  if (params().keep_num_dims == false)
+  {
+    output()->resize({batch_size, num_units});
+  }
+  else
+  {
+    luci_interpreter::Shape output_shape(input_shape.num_dims());
+    for (int i = 0; i < input_shape.num_dims(); ++i)
+      output_shape.dim(i) = input_shape.dim(i);
+    output_shape.dim(input_shape.num_dims() - 1) = num_units;
+    output()->resize(output_shape);
+  }
  }
  
  void FullyConnected::execute() const
@@ -172,7 +182,7 @@ void FullyConnected::evalQuantizedS8() const
    op_params.quantized_activation_max = output_activation_max;
    op_params.lhs_cacheable = false;
    op_params.rhs_cacheable = false;
-  tflite::reference_integer_ops::FullyConnected(
+  luci_interpreter_pal::FullyConnected<int8_t>(
      op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
      getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
      getTensorShape(output()), getTensorData<int8_t>(output()));
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp

index b0eda014528575a592b1efe6487b88f71a17ebc6..4474cc4fb9ff0d085c779f1f97c61e0101f6b55e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -133,7 +133,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
  
  TYPED_TEST(FullyConnectedTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp

new file mode 100644 (file)

index 0000000..f125666
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+               const GatherParams &gparams)
+  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+  if (params()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+                         indices()->element_type() == DataType::S64);
+
+  // refer tensorflow/lite/kernels/gather.cc
+
+  const Shape &params_shape = params()->shape();
+  const Shape &indices_shape = indices()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+  {
+    axis += params_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+  int batch_dims = _params.batch_dims;
+  // batch_dims should be in range: [-rank(indices), rank(indices)].
+  // Negative batch_dims is added with rank of positions.
+  if (batch_dims < 0)
+  {
+    batch_dims += indices_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+  }
+
+  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+  Shape output_shape(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = indices_shape.dim(i);
+  }
+  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+  switch (params()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Gather::evalFloat() const
+{
+  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+  const auto params_data = getTensorData<float>(params());
+  auto output_data = getTensorData<float>(output());
+
+  tflite::GatherParams tparams;
+  tparams.axis = _params.axis;
+  tparams.batch_dims = _params.batch_dims;
+
+  if (indices()->element_type() == DataType::S32)
+  {
+    const auto indices_data = getTensorData<int32_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+  else
+  {
+    const auto indices_data = getTensorData<int64_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h

new file mode 100644 (file)

index 0000000..cc02d64
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+  Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+  const Tensor *params() const { return _inputs[0]; }
+  const Tensor *indices() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp

new file mode 100644 (file)

index 0000000..4b3dda7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+  Shape params_shape = {3, 5};
+  Shape indices_shape = {3, 2};
+  std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+  std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+  std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+  Tensor indices_tensor =
+    makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 100;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 0;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp

index f0dd2db368c5e5d9adea5779b13abd12bbef520b..5ccae3c387a0cd08b6ed709205c470bf07086916 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.cpp
@@ -49,6 +49,12 @@ void Greater::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -79,6 +85,29 @@ void Greater::evalFloat() const
    }
  }
  
+template <typename T> void Greater::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
  void Greater::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h

index 877c139c911373333f1c757e7c99eec86ea33203..065f76d7b8f48bc2bd2a28cb7e33ed428a8012e4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.h
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp

index ba3925f1728174513c72bcb5d2357e384346f026..a48080124ef9142e1c24f66f9260b154d4c40380 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -97,6 +97,82 @@ TEST_F(GreaterTest, FloatBroardcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
  }
  
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, false, true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(GreaterTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp

index e7c1b4afe1fb059d8a13a798d36c1bb7c3dff50f..27e42c971415578e1323bc3a7dc295be6d6cc75e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -52,6 +52,12 @@ void GreaterEqual::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -82,6 +88,29 @@ void GreaterEqual::evalFloat() const
    }
  }
  
+template <typename T> void GreaterEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                 getTensorShape(y()), y_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
  void GreaterEqual::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h

index 4a0f48748405e4f93040614d862dc07b4d3f3e65..e333c30a6be51bd47d1959fa7ff815e02ab8e5a7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp

index a9d1723011db11a4f51acef39c86a744f7d3deb5..35bf88eab332cf2dedb10e608d8346a57bfa5288 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -96,6 +96,81 @@ TEST_F(GreaterEqualTest, FloatBroardcast)
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
  }
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,             // Row 1
+    4,         5,  max_value,     // Row 2
+    -1,        -4, -3,            // Row 3
+    min_value, -2, max_value - 1, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
  
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
@@ -223,6 +298,36 @@ TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp

index 1e565e3580a335d24fee3bfab2d81f24db8be81f..6f960e8b45d6e3b009da1fecc0e5ff38ac83591e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -81,7 +81,7 @@ template <typename T> class L2NormalizeTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(L2NormalizeTest, DataTypes);
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
  
  TYPED_TEST(L2NormalizeTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp

index 289742a50553b5177cf37aa138f8106e65d92138..7245456cb6597fc60b80737903feae453c0f72db 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -206,7 +206,8 @@ TEST_F(L2Pool2DTest, FloatPaddingSameStride)
    kernel.execute();
  
    std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
    // TODO make a Shape checking of output_tensor.
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp

index 6ec8a348a5aed43496395d9366507e4615960bea..0f6263b57539b98e23895ce6f1065ffd33f7ccfc 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -83,7 +83,7 @@ template <typename T> class LeakReluTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LeakReluTest, DataTypes);
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
  
  TYPED_TEST(LeakReluTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp

index 0414449260b20150060724d7bba3b172cbc3e976..8d26ff29785b30520c234bb1058c444d8fc2deb7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.cpp
@@ -49,6 +49,12 @@ void Less::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -79,6 +85,29 @@ void Less::evalFloat() const
    }
  }
  
+template <typename T> void Less::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+                                                        getTensorShape(y()), y_data,
+                                                        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+                                         getTensorShape(y()), y_data, getTensorShape(output()),
+                                         output_data);
+  }
+}
+
  void Less::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h

index 293740e72fd6f7ac5afd8ec507bf83cac441efbe..e27bb689c3aafba9f64e4ac58ab72e9fe97714b8 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.h
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp

index e9d09b288d2c2142681e5d73213a7bf69ca95339..8c5963363dcb5ce26f2b62aa73943a79dc7a7bba 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessTest, FloatBroardcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
  }
  
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp

index 5f4c7f7aacd3a9be849ac597306eb91de26ca96f..b474bc47a39e81076110c96603468cf6034f61d5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -49,6 +49,12 @@ void LessEqual::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -79,6 +85,29 @@ void LessEqual::evalFloat() const
    }
  }
  
+template <typename T> void LessEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
  void LessEqual::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h

index b6da1a2a850cac32ff74c76209f6d7c919bac40f..f82ea90d4f2ba2067d2854433762fcecfea4c521 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.h
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp

index 0558003dd90081a49ab7a072b389e63f93fc1632..b2e2fa7a1dc0a8536d3e4a999fcc7f7cca079842 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessEqualTest, FloatBroardcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
  }
  
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  true,  false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessEqualTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp

index 70227563fcdf75b20b751251a61897f70f310e10..5a1ea669c1f504604caa173a06259cf360b83513 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -76,7 +76,7 @@ template <typename T> class LogisticTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LogisticTest, DataTypes);
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
  
  TYPED_TEST(LogisticTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp

index 89049c96c55396dc40c6993b47c7b7f3995384d5..2fbeefce4cdaa5c69001acba8cd0e5738e357e48 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -18,8 +19,6 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
  namespace luci_interpreter
  {
  namespace kernels
@@ -59,44 +58,25 @@ void MirrorPad::configure()
    output()->resize(output_shape);
  }
  
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
  void MirrorPad::execute() const
  {
-  const int num_dims = input()->shape().num_dims();
-
-  tflite::PadParams params{};
-  params.left_padding_count = num_dims;
-  params.right_padding_count = num_dims;
-
-  const auto *paddings_data = getTensorData<int32_t>(paddings());
-  for (int i = num_dims - 1; i >= 0; --i)
-  {
-    params.left_padding[i] = paddings_data[i * 2];
-    params.right_padding[i] = paddings_data[i * 2 + 1];
-  }
-
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
      {
-      const float pad_value = 0;
-
-      // NOTE: this implementation only obtains min-max values for quantization
-      // TODO: calculate proper inference values
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<float>(output()));
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
        break;
      }
      case DataType::U8:
      {
-      // NOTE: this implementation only obtains min-max values for quantization
-      // TODO: calculate proper inference values
        assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
        assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
-      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
-      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                                 &pad_value, getTensorShape(output()),
-                                 getTensorData<uint8_t>(output()));
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
        break;
      }
      default:
@@ -104,5 +84,87 @@ void MirrorPad::execute() const
    }
  }
  
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp

index de9da5051fede7a6ed3721e4070d986e12d5fcdb..740d8cb2272f8a17e783216495717b03faa19653 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -14,4 +14,212 @@
   * limitations under the License.
   */
  
-// TODO: Add tests for MirrorPad
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+  {
+    MirrorPadParams params{};
+    params.mode = mode;
+
+    MirrorPad kernel(&input, &padding, &output, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output);
+    kernel.execute();
+  }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+  Shape input_shape = {3, 1};
+  Shape padding_shape = {2, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+  std::vector<int> padding_data{1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+  std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp

index bc855de0f71eee4ea83ef1ba8d02ce1f9890f313..531fb4fa1ed55460a63e698c996c662f04a1448f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -42,6 +42,8 @@ void Mul::configure()
    LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
    if (input1()->element_type() == DataType::S16)
    {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1)
      LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
                             output()->zero_point() == 0);
    }
@@ -56,6 +58,12 @@ void Mul::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::S16:
        evalQuantizedS16();
        break;
@@ -66,13 +74,8 @@ void Mul::execute() const
  
  void Mul::evalFloat() const
  {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
    tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<float>(params, _params.activation);
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
      getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -91,6 +94,28 @@ void Mul::evalFloat() const
    }
  }
  
+template <typename T> void Mul::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                              getTensorShape(input2()), getTensorData<T>(input2()),
+                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
  void Mul::evalQuantizedS16() const
  {
    const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h

index 2ccf60f3a0c872a1ab01c0e3fa8932085422b777..c0cf817dfebe41beeac0359effa695f98f232c56 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.h
+++ b/compiler/luci-interpreter/src/kernels/Mul.h
@@ -42,6 +42,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantizedS16() const;
  };
  
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp

index 471f6ac8607fb8bc57d3e9803bceab0301fac2d4..fc0e60614135e807db3c830d2e3ee7525377451a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -93,6 +93,78 @@ TEST_F(MulTest, Float)
    }
  }
  
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  dtype max_value = std::numeric_limits<dtype>::max();
+  dtype res_max = max_value - max_value % 10;
+
+  std::vector<std::vector<dtype>> test_outputs = {
+    {8,  0, 20,  0, 4,  30,  //
+     16, 0, 40,  3, 8,  0,   //
+     0,  0, 0,   6, 0,  0,   //
+     4,  0, 10,  9, 2,  0,   //
+     40, 0, 100, 0, 20, 150, //
+     28, 0, 70,  0, 14, res_max},
+    {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+    {8,  12,     0, 0, 20, 30, 16, 0, 0, 0,  40, 0,   0,   0, 0, 0,  0,
+     0,  0,      9, 2, 0,  10, 0,  0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+     70, res_max},
+    {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+  std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+  std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
  TEST_F(MulTest, SInt16)
  {
    Shape base_shape = {2, 3, 1, 2};
@@ -161,6 +233,60 @@ TEST_F(MulTest, SInt16)
    }
  }
  
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp

index 99d5e0fa09198f70229dfeb6e129df7ad6831b86..54e5eee346965ce92448799767f7ef81035a4489 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -49,6 +49,12 @@ void NotEqual::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -79,6 +85,29 @@ void NotEqual::evalFloat() const
    }
  }
  
+template <typename T> void NotEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                            getTensorShape(y()), y_data,
+                                                            getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                             getTensorShape(y()), y_data, getTensorShape(output()),
+                                             output_data);
+  }
+}
+
  void NotEqual::evalQuantized() const
  {
    const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h

index 247874df7125680f0515f1e819f7c9ac28c88e64..d2aafe8933ecf186384f1de5bbee53d7346c5d02 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.h
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -38,6 +38,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp

index 763f868932658316b481536596ba81f7ef35e65d..45bf4022af272f283c0ffda2798c62af2f94f326 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -99,6 +99,82 @@ TEST_F(NotEqualTest, FloatBroardcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
  }
  
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  true,  false, // Row 2
+    true,  false, true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(NotEqualTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp

new file mode 100644 (file)

index 0000000..4d3e5f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      throw std::runtime_error("Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h

new file mode 100644 (file)

index 0000000..572f857
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+  OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+         const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+  const Tensor *indices() const { return _inputs[0]; }
+  const Tensor *depth() const { return _inputs[1]; }
+  const Tensor *on_value() const { return _inputs[2]; }
+  const Tensor *off_value() const { return _inputs[3]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp

new file mode 100644 (file)

index 0000000..45b6968
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+           std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+           int32_t axis, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr auto input_type = getElementType<T1>();
+  constexpr auto output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  OneHotParams params{};
+  params.axis = axis;
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+  // axis 0
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/0,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 1, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 1, 0, //
+                              0, 1, 0, //
+                            });
+  // axis 1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+
+                              0, 0, 1, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+                            });
+  // axis -1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/-1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              0, 0, 0, 0, //
+
+                              0, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              1, 0, 0, 0, //
+                            });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // input type should be integer
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+  // type of on_value, off_value and output_tensor should be same
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  // axis should be in [-1, input_shape.rank]
+  OneHotParams params = {-2};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp

index 90a0f894e5cbb82931739fe38114a00b09bc4e24..2404e43033c173c227bab0de330eb93e26cb1077 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -80,7 +80,7 @@ template <typename T> class PackTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<uint8_t, float>;
-TYPED_TEST_CASE(PackTest, DataTypes);
+TYPED_TEST_SUITE(PackTest, DataTypes);
  
  TYPED_TEST(PackTest, ThreeInputs)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp

index 700448e7a2412465429f1542ce98aad8849d4966..fe172884b43c3915c37bc528f01ebe85241fe581 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -93,6 +93,16 @@ void Pad::execute() const
                                   getTensorData<uint8_t>(output()));
        break;
      }
+    case DataType::S8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+      const auto pad_value = static_cast<int8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<int8_t>(output()));
+      break;
+    }
      default:
        throw std::runtime_error("Unsupported type.");
    }
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp

index 7994263e228b0cc014c93fe023c4fcf5c1a4ab6f..dd3ce947c0ea9bd093037734b2e8339ec84b22d4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -54,6 +54,32 @@ TEST(Pad, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
  }
  
+TEST(Pad, Int8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+  std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0,    0,    0,    0, 0, 0, -0.2, 0.4, 0.5, 0,
+                                     0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7,  0.1, 0.2, 0,
+                                     0, 0, 0,    0,    0,    0, 0, 0, 0,    0,   0,   0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
  TEST(Pad, Float)
  {
    std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp

new file mode 100644 (file)

index 0000000..0c8544a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case loco::DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case loco::DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case loco::DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      throw std::runtime_error("Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::S16);
+      break;
+    }
+    case loco::DataType::S16:
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S16);
+      if (output()->element_type() == loco::DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case loco::DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case loco::DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case loco::DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          throw std::runtime_error("Unsupported type.");
+      }
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h

new file mode 100644 (file)

index 0000000..006c536
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+  Quantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp

new file mode 100644 (file)

index 0000000..22e67fe
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+  std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+  std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400,  -200,
+                                       200,    400,    600,  12700, 12800};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp

index 7af20f8c4bf294513981e637b32f28e9ead792de..933a1128c6fad33ada70ab95e7c221c92f330c0e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class ResizeBilinearTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeBilinearTest, DataTypes);
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
  
  TYPED_TEST(ResizeBilinearTest, SimpleTest)
  {
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp

index 0e9017c78e5a880076234d89275b0eb6bd757462..7ade02a6feedee34a8ae2221fa869a0e84d42a32 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -92,7 +92,7 @@ template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeNearestNeighborTest, DataTypes);
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
  
  TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
  {
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp

index 2bd94875bcfff77275be23e587b579eaf60e78fb..c0025faca73ca4fe844c0645c2eb92e096728b06 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -33,7 +33,7 @@ template <typename T> class ReverseV2Test : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ReverseV2Test, DataTypes);
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
  
  TYPED_TEST(ReverseV2Test, MultiDimensions)
  {
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp

new file mode 100644 (file)

index 0000000..40d79aa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+  switch (activation)
+  {
+    case luci::FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case luci::FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case luci::FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case luci::FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case luci::FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+                         input()->element_type() == loco::DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+                           weight_time()->element_type() == loco::DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+                           input_activation_state()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+            weight_feature()->element_type() == loco::DataType::S8) &&
+           input()->element_type() == loco::DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    case loco::DataType::S8:
+    {
+      if (input()->element_type() == loco::DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        throw std::runtime_error("Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h

new file mode 100644 (file)

index 0000000..335a6cd
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+  SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+       const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+       Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+       Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+       const SVDFParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weight_feature() const { return _inputs[1]; }
+  const Tensor *weight_time() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  const Tensor *input_activation_state() const { return _inputs[4]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp

new file mode 100644 (file)

index 0000000..82bd9b0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape bias_shape{units};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+                                0.17660543, 0.52949083, -0.77931279};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+  std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+  std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+  std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+  std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+    weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+    weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+    weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+    weight_time_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(
+    DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::RELU;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+  std::vector<int32_t> ref_output_shape{batches, units};
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+                                0.35867718, 0.36897406,  0.73463392};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.014899,    -0.0517661, -0.143725, -0.00271883,
+                                     -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+  std::vector<float> ref_output_shape{batches, units};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t right_input_size = 3;
+  const int32_t wrong_input_size = 4;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, wrong_input_size};
+  Shape weight_feature_shape{num_filters, right_input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp

index 37a834a18debcea357e7175d28aa423cf5f38be7..2fe2c5471e0dc0aec6cb3216959527b045854947 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -139,6 +139,11 @@ void Slice::execute() const
                                    getTensorData<uint8_t>(input()), getTensorShape(output()),
                                    getTensorData<uint8_t>(output()));
        break;
+    case DataType::S8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<int8_t>(input()), getTensorShape(output()),
+                                  getTensorData<int8_t>(output()));
+      break;
      default:
        throw std::runtime_error("Unsupported input type.");
    }
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp

index 3e0d0b0d75fe82e65370636a74893efc15d1e19c..517982990c8c154ef980261544b9fdf9b32f61c7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -31,8 +31,8 @@ template <typename T> class SliceTest : public ::testing::Test
  {
  };
  
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SliceTest, DataTypes);
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
  
  TYPED_TEST(SliceTest, SimpleTest)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp

index 9de40b6ec6dd70b262b0ca6f48e262e8723a12c7..08e70672d1682d7d5eff8abf4ca5d29a7b45b0f6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -93,7 +93,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(SoftmaxTest, DataTypes);
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
  
  TYPED_TEST(SoftmaxTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp

index e06501c8ce537b9bedb07a2f4ad8a0bf2947b863..3a8b0a812b67b17cee6f38e55958a6a23e4f8daf 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class SpaceToBatchNDTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
  
  TYPED_TEST(SpaceToBatchNDTest, Simple)
  {
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp

index 735c010b9fff585586cc80a644515782252f61ce..4af48861873c9e6a1f0655b5bc5b3fdf6f03acae 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
  
  TYPED_TEST(SpaceToDepthTest, SimpleCase)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp

index 74d57aed31692f676f414d32641bf69d7fd08b1e..283cd9aa96ea541d22584ca0da5e65dfbb8f4d8d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -73,7 +73,7 @@ template <typename T> class SplitTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SplitTest, DataTypes);
+TYPED_TEST_SUITE(SplitTest, DataTypes);
  
  TYPED_TEST(SplitTest, FourDimensional)
  {
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp

index aac0567d7cc8aad245a0c965ee193827ac462557..035bc2122fbf02ec2d27659d84c524245fa41667 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -77,7 +77,7 @@ template <typename T> class SplitVTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
-TYPED_TEST_CASE(SplitVTest, DataTypes);
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
  
  TYPED_TEST(SplitVTest, ThreeDimensional)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp

index d3326fe9816044565db45fe27d1faf0ea792b91d..1bc0b6459e28fe24874af2379041f0a620c0060c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -56,7 +56,7 @@ template <typename T> class SqueezeTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SqueezeTest, DataTypes);
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
  
  TYPED_TEST(SqueezeTest, TotalTest)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp

index 603c62d0fc2b645c91b8713301c953539f55cb48..24b6a72e5a5cd63ae919b539d012db08d8d508cb 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -37,6 +37,7 @@ Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubPa
  void Sub::configure()
  {
    LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
    output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
  }
  
@@ -47,6 +48,12 @@ void Sub::execute() const
      case DataType::FLOAT32:
        evalFloat();
        break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
      case DataType::U8:
        evalQuantized();
        break;
@@ -57,13 +64,8 @@ void Sub::execute() const
  
  void Sub::evalFloat() const
  {
-  float activation_min{};
-  float activation_max{};
-  calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
    tflite::ArithmeticParams params{};
-  params.float_activation_min = activation_min;
-  params.float_activation_max = activation_max;
+  fillArithmeticActivationRange<float>(params, _params.activation);
  
    const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
      getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -82,6 +84,28 @@ void Sub::evalFloat() const
    }
  }
  
+template <typename T> void Sub::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
  void Sub::evalQuantized() const
  {
    const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h

index d7940b5c667591680a68a7ecf1ef0404e94b896f..23952b3bdefe2077e50a4a267083ffbcfd76e6fc 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.h
+++ b/compiler/luci-interpreter/src/kernels/Sub.h
@@ -39,6 +39,7 @@ public:
  
  private:
    void evalFloat() const;
+  template <typename T> void evalInteger() const;
    void evalQuantized() const;
  };
  
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp

index c189f44818582ea6bc05fab5475aacb89289ff93..9abafd49af19e91de041d630b430b123dc120625 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -162,6 +162,51 @@ TEST_F(SubTest, Float)
    }
  }
  
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {0, 1, 2, 3, 0, 0, 0, 0, 4,  1, 0, 0, 0, 0, 7,  0, 3, 0,
+     0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+    {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+    {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+     2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+    {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(SubTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
  TEST_F(SubTest, Input_Output_Type_NEG)
  {
    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -175,11 +220,24 @@ TEST_F(SubTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST_F(SubTest, Invalid_Input_Type_NEG)
+TEST_F(SubTest, Invalid_Output_Type_NEG)
  {
    Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
    Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
-  Tensor output_tensor = makeOutputTensor(DataType::S64);
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
  
    SubParams params{};
    params.activation = Activation::RELU;
@@ -190,6 +248,19 @@ TEST_F(SubTest, Invalid_Input_Type_NEG)
    EXPECT_ANY_THROW(kernel.execute());
  }
  
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::NONE;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
  } // namespace
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp

index 1071799106e1736d18941d1e4222fab04b2d9184..43be8f8b95218e74ed8e688572d5a38bd3f10e71 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -52,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(TransposeTest, DataTypes);
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
  
  TYPED_TEST(TransposeTest, Small3D)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp

index 4f22c9f3058ce6d929f1597a33d4b2e5967bfdd7..9384ddc83a16a5a72e179b5578db18567e67ec1b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -75,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test
  };
  
  using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(UnpackTest, DataTypes);
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
  
  TYPED_TEST(UnpackTest, ThreeOutputs)
  {
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp

index 586cfa1e14bbc6ffcf292e27b1b73e376c3761fd..5d8e5db8356c5316f091e7c4af0337ec39e5b9a3 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -27,17 +27,18 @@ namespace luci_interpreter
  namespace kernels
  {
  
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max)
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
  {
    switch (activation)
    {
      case Activation::NONE:
-      *activation_min = std::numeric_limits<float>::lowest();
-      *activation_max = std::numeric_limits<float>::max();
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
        break;
      case Activation::RELU:
        *activation_min = 0;
-      *activation_max = std::numeric_limits<float>::max();
+      *activation_max = std::numeric_limits<T>::max();
        break;
      case Activation::RELU_N1_TO_1:
        *activation_min = -1;
@@ -52,6 +53,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
    }
  }
  
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
  static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
                                                    const Tensor *output, int32_t *activation_min,
                                                    int32_t *activation_max)
@@ -175,7 +183,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_
    {
      const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
      const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
-    assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1);
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
      output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
    }
  
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h

index 817a42f831a5f7c2cf71a5d16d16aee1403890bd..ebeb20e66a252995451f384a42ec8b28d3555846 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -76,11 +76,42 @@ inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2
    return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
  }
  
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
  
  void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
                                         int32_t *activation_min, int32_t *activation_max);
  
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
  // Decompose a double multiplier into a Q0.31 int32 representation of its
  // significand, and shift representation of its exponent.
  //
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt

index 2cde99f5d934d00ffccfe0dc7c2f207643f61e12..2927715924bdc9f927124199443f1783451df011 100644 (file)
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -17,7 +17,9 @@ endmacro(REGISTER_KERNEL)
  include(${KERNEL_REGISTER_FILE})
  
  add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
  target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
  
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp

index a14442ed5fb6b51b8af33f555c0aa809842a310c..dba39050c22c794ed1bdb1f3da45dbbb3782db1a 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -73,6 +73,26 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
    }
  }
  
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+  if (node->custom_code() != "CircleReferencingConst")
+    return nullptr;
+
+  // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+  // TODO move this struct to header
+  struct ConstDataReference
+  {
+    const uint8_t *data = nullptr;
+    uint32_t size = 0;
+  };
+
+  const auto &custom_options = node->custom_options();
+  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+  *data_size = const_data_ref.size;
+  return const_data_ref.data;
+}
+
  bool isExecutableNode(const luci::CircleNode *node)
  {
    switch (node->opcode())
@@ -83,12 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node)
      case luci::CircleOpcode::CIRCLEOUTPUT:
      case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
      // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
      case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
      case luci::CircleOpcode::CIRCLESPLITOUT:
      case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
      case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEVARIABLE:
      case luci::CircleOpcode::CIRCLEWHILEOUT:
        return false;
+    // Custom nodes may be executable and non-executable
+    case luci::CircleOpcode::CUSTOM:
+    {
+      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+      // TODO handle more non-executable Custom ops here
+      if (custom_node->custom_code() == "CircleReferencingConst")
+        return false;
+
+      return true;
+    }
      default:
        return true;
    }
@@ -102,15 +140,34 @@ bool isTensorProducingNode(const luci::CircleNode *node)
      case luci::CircleOpcode::CIRCLEOUTPUT:
      // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
      // are produced by the corresponding *Out nodes instead.
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
      case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
      case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
      case luci::CircleOpcode::UNPACK:
+    case luci::CircleOpcode::WHILE:
        return false;
      default:
        return true;
    }
  }
  
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+  // TODO handle more Custom ops here
+  if (custom_node->custom_code() == "CircleReferencingConst")
+    return true;
+
+  return false;
+}
+
  } // namespace
  
  GraphLoader::GraphLoader(
@@ -129,18 +186,25 @@ void GraphLoader::loadTensors()
    {
      const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
  
+    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+      throw std::runtime_error("Unknown Custom Node, yet.");
+
      if (!isTensorProducingNode(node))
        continue;
  
-    // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred.
+    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+    // be inferred.
      Shape shape{};
-    if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node))
+    switch (node->opcode())
      {
-      shape = getNodeShape(input_node);
-    }
-    else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
-    {
-      shape = getNodeShape(const_node);
+      case luci::CircleOpcode::CIRCLECONST:
+      case luci::CircleOpcode::CIRCLECUSTOMOUT:
+      case luci::CircleOpcode::CIRCLEINPUT:
+      case luci::CircleOpcode::CIRCLEVARIABLE:
+        shape = getNodeShape(node);
+        break;
+      default:
+        break;
      }
  
      AffineQuantization quantization;
@@ -175,6 +239,22 @@ void GraphLoader::loadTensors()
          tensor->writeData(const_data, data_size);
        }
      }
+    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+    {
+      const auto *custom_node =
+        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+      if (custom_node->custom_code() == "CircleReferencingConst")
+      {
+        size_t data_size{};
+        const void *const_data = getNodeData(custom_node, &data_size);
+        if (const_data != nullptr)
+        {
+          _memory_manager->allocate_memory(*tensor);
+          tensor->writeData(const_data, data_size);
+        }
+      }
+    }
  
      _node_to_tensor.emplace(node, tensor.get());
      _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp

index 7a457a62f9f257976aa5054393e28e9978654af7..b221b6921d5cee7cda4f8093861d4870d256320e 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -21,6 +21,7 @@
  #include <kernels/Add.h>
  #include <kernels/ArgMax.h>
  #include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
  #include <kernels/Cast.h>
  #include <kernels/Concatenation.h>
  #include <kernels/Conv2D.h>
@@ -54,6 +55,7 @@
  #include <kernels/Mul.h>
  #include <kernels/Neg.h>
  #include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
  #include <kernels/Pad.h>
  #include <kernels/PadV2.h>
  #include <kernels/Pow.h>
@@ -209,6 +211,27 @@ TEST_F(KernelBuilderTest, AveragePool2D)
    EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
  }
  
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+  auto *lhs = createInputNode();
+  auto *rhs = createInputNode();
+
+  auto *op = createNode<luci::CircleBatchMatMul>();
+  op->x(lhs);
+  op->y(rhs);
+  op->adj_x(false);
+  op->adj_y(false);
+
+  auto kernel = buildKernel<kernels::BatchMatMul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), lhs);
+  checkTensor(kernel->y(), rhs);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+  EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
  TEST_F(KernelBuilderTest, Cast)
  {
    auto *input = createInputNode();
@@ -832,6 +855,31 @@ TEST_F(KernelBuilderTest, NotEqual)
    checkTensor(kernel->output(), op);
  }
  
+TEST_F(KernelBuilderTest, OneHot)
+{
+  auto *indices = createInputNode();
+  auto *depth = createInputNode();
+  auto *on_value = createInputNode();
+  auto *off_value = createInputNode();
+  auto axis = 1;
+
+  auto *op = createNode<luci::CircleOneHot>();
+  op->indices(indices);
+  op->depth(depth);
+  op->on_value(on_value);
+  op->off_value(off_value);
+  op->axis(axis);
+
+  auto kernel = buildKernel<kernels::OneHot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->indices(), indices);
+  checkTensor(kernel->depth(), depth);
+  checkTensor(kernel->on_value(), on_value);
+  checkTensor(kernel->off_value(), off_value);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
  TEST_F(KernelBuilderTest, Pad)
  {
    auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp

index 5bc37bd4a10beabd060569663cfb8cfa8c018a97..efb011257d59c249d066a2db1c21442689f7ccd1 100644 (file)
--- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -17,6 +17,7 @@
  #include "Builders.h"
  
  #include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
  
  namespace luci_interpreter
  {
@@ -40,7 +41,26 @@ std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode
    params.stride_width = node->stride()->w();
    params.activation = node->fusedActivationFunction();
  
-  return std::make_unique<kernels::AveragePool2D>(input, output, params);
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
  }
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp

new file mode 100644 (file)

index 0000000..aae3dba
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *lhs = helper.getInputTensor(node->x());
+  const Tensor *rhs = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto lhs_scratchpad =
+    std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+  lhs_scratchpad->set_observable(false);
+  lhs_scratchpad->set_data_buffer(nullptr);
+  auto rhs_scratchpad =
+    std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+  rhs_scratchpad->set_observable(false);
+  rhs_scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current BatchMatMul temporary was found.
+    if (execution_plan.offsets().size() > 1)
+    {
+      assert(execution_plan.offsets().size() == 3);
+
+      // If this is true, then we keep this offset in scratchpad.
+      lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+      rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+    }
+  }
+  Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+  Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+  BatchMatMulParams params;
+  params.adj_x = node->adj_x();
+  params.adj_y = node->adj_y();
+
+  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp

index 22fd1aca439e55fbabec54a50a4073213c4e2376..b48d97d1943fab43d547e7827c6f38a0aba144e2 100644 (file)
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -35,11 +35,12 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
    const Tensor *bias = helper.getOptionalInputTensor(node->bias());
    Tensor *output = helper.getOutputTensor(node);
  
-  auto im2col =
-    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
-  im2col->set_observable(false);
-  im2col->set_data_buffer(nullptr);
-  // If node has execution plan then read memory offsets for im2col temporary tensor
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
    // from the beginning of shared memory buffer.
    // Used in Static Memory Manager.
    // TODO move tensors offset initialization to one place
@@ -48,10 +49,10 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
      const auto execution_plan = luci::get_execution_plan(node);
      // Check whether the offset for the current CircleConv2D temporary was found.
      if (execution_plan.offsets().size() > 1)
-      // If this is true, then we keep this offset in im2col.
-      im2col->set_offset(execution_plan.offsets().at(1));
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
    }
-  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
  
    Conv2DParams params{};
    params.padding = node->padding();
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp

index c2f0346a2ffa6a4b41b065a38639f3361c0e6a54..db26ecf2e864ba9a214fcac7970615cec576f966 100644 (file)
--- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -17,6 +17,7 @@
  #include "Builders.h"
  
  #include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
  
  namespace luci_interpreter
  {
@@ -43,7 +44,26 @@ std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNod
    params.dilation_width_factor = node->dilation()->w();
    params.activation = node->fusedActivationFunction();
  
-  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
  }
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp

new file mode 100644 (file)

index 0000000..4aae564
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp

new file mode 100644 (file)

index 0000000..9840c34
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp

index 2917598fcf2db7cb9d57df0fe1c29946858e55f4..0b8ac44bd5af82c911fd1fb2f0ef86b5b0615bde 100644 (file)
--- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -36,6 +36,7 @@ std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode
  
    FullyConnectedParams params{};
    params.activation = node->fusedActivationFunction();
+  params.keep_num_dims = node->keep_num_dims();
  
    return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
  }
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp

new file mode 100644 (file)

index 0000000..9df9775
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *params = helper.getInputTensor(node->params());
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  GatherParams gparams{};
+  gparams.axis = node->axis();
+  // TODO support batch_dims
+  gparams.batch_dims = 0;
+
+  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp

new file mode 100644 (file)

index 0000000..a401609
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  const Tensor *depth = helper.getInputTensor(node->depth());
+  const Tensor *on_value = helper.getInputTensor(node->on_value());
+  const Tensor *off_value = helper.getInputTensor(node->off_value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  OneHotParams params{};
+  params.axis = node->axis();
+
+  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp

new file mode 100644 (file)

index 0000000..fd98363
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp

new file mode 100644 (file)

index 0000000..89528d5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *feature = helper.getInputTensor(node->weight_feature());
+  const Tensor *time = helper.getInputTensor(node->weight_time());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+                                                    Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  if (data_type == DataType::FLOAT32 &&
+      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+  {
+    data_type = feature->element_type();
+  }
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  data_type = DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  SVDFParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.svdf_rank = node->svdf_rank();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt

index 94347082cd3575775a71cee54cef19c448739dab..c8a2e12e15eaeaf4fa6d74f3d4a79db8159d5dc7 100644 (file)
--- a/compiler/luci-micro/CMakeLists.txt
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -6,7 +6,7 @@ set(ARM_OBJCOPY "arm-none-eabi-objcopy")
  find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
  
  if(NOT ARM_C_COMPILER_PATH)
-  message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build")
+  message(STATUS "Build luci-micro: FALSE(ARM compiler is NOT FOUND)")
    return()
  endif()
  
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt

index b3141587015a8a3faa5e30aab2c99facb257c9dc..034fe5269b857240e3548f6f13e643309d72137e 100644 (file)
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  unset(TEST_DEPS)
  unset(LUCI_PASS_VALUE_TESTS)
  
@@ -38,7 +42,7 @@ add_test(NAME luci_pass_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
            "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
+          "${NNCC_OVERLAY_DIR}/venv_2_8_0"
            "$<TARGET_FILE:luci_eval_driver>"
            ${LUCI_PASS_VALUE_TESTS}
  )
diff --git a/compiler/luci-pass-value-test/eval_result_verifier.py b/compiler/luci-pass-value-test/eval_result_verifier.py

index c6005edfcaa0f89f393fafdb8a5abaac1503e4ef..0073c4db5aacbc67627249d6700e60b5bd9dde02 100644 (file)
--- a/compiler/luci-pass-value-test/eval_result_verifier.py
+++ b/compiler/luci-pass-value-test/eval_result_verifier.py
@@ -22,6 +22,18 @@ circle_model = args.circle
  interpreter = tf.lite.Interpreter(tflite_model)
  interpreter.allocate_tensors()
  
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
  # Generate random input data.
  num_inputs = len(interpreter.get_input_details())
  for i in range(num_inputs):
@@ -33,6 +45,10 @@ for i in range(num_inputs):
          input_data = np.array(
              np.random.randint(0, 256, size=input_details["shape"]),
              input_details["dtype"])
+    elif input_details["dtype"] == np.int16:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
      elif input_details["dtype"] == np.bool_:
          input_data = np.array(
              np.random.choice(a=[True, False], size=input_details["shape"]),
@@ -55,48 +71,38 @@ subprocess.run(
      check=True)
  
  # Compare the results.
-for idx in range(len(interpreter.get_output_details())):
-    output_details = interpreter.get_output_details()[idx]
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
      output_data = np.fromfile(circle_model + ".output" + str(idx),
                                output_details["dtype"])
      shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
      output_shape = [int(i) for i in shape_file.read().split(',')]
      luci_output_data = np.reshape(output_data, output_shape)
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
      try:
          if output_details["dtype"] == np.uint8:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.float32:
              if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=1.e-5,
-                    atol=1.e-5) == False:
+                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.int64:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.int32:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+        elif output_details["dtype"] == np.int16:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          else:
diff --git a/compiler/luci-pass-value-test/test.lst b/compiler/luci-pass-value-test/test.lst

index 9c408887d951baab6115f23448ac59867b5febb5..67476c6446236cb4184e4252d2503bf690b2b03a 100644 (file)
--- a/compiler/luci-pass-value-test/test.lst
+++ b/compiler/luci-pass-value-test/test.lst
@@ -29,3 +29,7 @@ addeval(Net_InstanceNorm_001 fuse_instnorm)
  addeval(Net_InstanceNorm_002 fuse_instnorm)
  addeval(Net_InstanceNorm_003 fuse_instnorm)
  addeval(Net_StridedSlice_StridedSlice_000 remove_unnecessary_strided_slice)
+
+# test SignatureDef, with any optimization
+#addeval(SignatureDef_MultiOut_000 fuse_instnorm)
+#addeval(SignatureDef_MultiOut_001 fuse_instnorm)
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt

index 3c7185b80ab2a155032787558300152ca346323a..ebf9c5926f4595f005087bc7df5640267f358482 100644 (file)
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -1,9 +1,18 @@
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  unset(LUCI_VALUE_TESTS)
+unset(LUCI_VALUE_TESTS_TOL)
  
  macro(addeval NAME)
    list(APPEND LUCI_VALUE_TESTS ${NAME})
  endmacro(addeval)
  
+macro(addevaltol NAME RTOL ATOL)
+  list(APPEND LUCI_VALUE_TESTS_TOL ${NAME} ${RTOL} ${ATOL})
+endmacro(addevaltol)
+
  # Read "test.lst"
  include("test.lst")
  # Read "test.local.lst" if exists
@@ -12,13 +21,60 @@ include("test.local.lst" OPTIONAL)
  # Generate dependencies
  add_custom_target(luci_eval_testfiles ALL DEPENDS ${TESTFILES})
  
-get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+if(NOT CMAKE_CROSSCOMPILING)
+
+  get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
+
+  add_test(NAME luci_value_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${ARTIFACTS_BIN_PATH}"
+            "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+            "$<TARGET_FILE:luci_eval_driver>"
+            ${LUCI_VALUE_TESTS}
+  )
+
+  if(DEFINED LUCI_VALUE_TESTS_TOL)
+    add_test(NAME luci_value_tol_test
+      COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol.sh"
+              "${CMAKE_CURRENT_BINARY_DIR}"
+              "${ARTIFACTS_BIN_PATH}"
+              "${NNCC_OVERLAY_DIR}/venv_2_8_0"
+              "$<TARGET_FILE:luci_eval_driver>"
+              ${LUCI_VALUE_TESTS_TOL}
+    )
+  endif()
+
+else(NOT CMAKE_CROSSCOMPILING)
+  # NOTE target test is carried out using reference input/output data from host
+  #      test results. this is because it would be difficult to prepare
+  #      TensorFlow lite for target device.
+  #      thus, one must run the host test and then run the test in target device
+  #      with the test result files from the host test.
+
+  if(NOT DEFINED ENV{BUILD_HOST_EXEC})
+    message(STATUS "BUILD_HOST_EXEC not set: Skip luci-value-test")
+    return()
+  endif(NOT DEFINED ENV{BUILD_HOST_EXEC})
+
+  set(ARTIFACTS_BIN_PATH $ENV{BUILD_HOST_EXEC}/compiler/common-artifacts)
+
+  add_test(NAME luci_value_cross_test
+    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify_ref.sh"
+            "${CMAKE_CURRENT_BINARY_DIR}"
+            "${ARTIFACTS_BIN_PATH}"
+            "$<TARGET_FILE:luci_eval_driver>"
+            ${LUCI_VALUE_TESTS}
+  )
+
+  if(DEFINED LUCI_VALUE_TESTS_TOL)
+    add_test(NAME luci_value_cross_tol_test
+             COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverifytol_ref.sh"
+                     "${CMAKE_CURRENT_BINARY_DIR}"
+                     "${ARTIFACTS_BIN_PATH}"
+                     "$<TARGET_FILE:luci_eval_driver>"
+                     ${LUCI_VALUE_TESTS_TOL}
+    )
+  endif()
  
-add_test(NAME luci_value_test
-  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
-          "${CMAKE_CURRENT_BINARY_DIR}"
-          "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
-          "$<TARGET_FILE:luci_eval_driver>"
-          ${LUCI_VALUE_TESTS}
-)
+endif(NOT CMAKE_CROSSCOMPILING)
diff --git a/compiler/luci-value-test/evalverify.sh b/compiler/luci-value-test/evalverify.sh

index 01c4bce463a408c072632eefc2126015ba347115..3d209117651e473789d0c18837d18c3781ebb7b5 100755 (executable)
--- a/compiler/luci-value-test/evalverify.sh
+++ b/compiler/luci-value-test/evalverify.sh
@@ -4,10 +4,12 @@
  #
  # HOW TO USE
  #
-# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <TEST 1> <TEST 2> ...
+# ./evalverify.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+#                 <TEST 1> <TEST 2> ...
  # bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
  # work_dir : artifacts directoy where test materials exist
  # venv_dir : python virtual environment home directory
+# eval_driver : luci_eval_driver path for evaluation
  
  VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
diff --git a/compiler/luci-value-test/evalverify_ref.sh b/compiler/luci-value-test/evalverify_ref.sh

new file mode 100755 (executable)

index 0000000..f1e538a
--- /dev/null
+++ b/compiler/luci-value-test/evalverify_ref.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverify_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+#                     <TEST 1> <TEST 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir  : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+for TESTCASE in "$@"; do
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "python3" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model_ref "${TESTCASE_FILE}" \
+    --work_path "${TEST_RESULT_FILE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol.sh b/compiler/luci-value-test/evalverifytol.sh

new file mode 100755 (executable)

index 0000000..9209405
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol.sh <path/to/bin_dir> <path/to/work_dir> <path/to/venv_dir> <path/to/eval_driver> \
+#                    <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# work_dir : artifacts directoy where test materials exist
+# venv_dir : python virtual environment home directory
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier.py"
+BINDIR="$1"; shift
+WORKDIR="$1"; shift
+VIRTUALENV="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+  TESTCASE=$1
+  RTOLERANCE=$2
+  ATOLERANCE=$3
+  shift 3
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model "${TESTCASE_FILE}" \
+    --rtolf32 "${RTOLERANCE}" \
+    --atolf32 "${ATOLERANCE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/evalverifytol_ref.sh b/compiler/luci-value-test/evalverifytol_ref.sh

new file mode 100755 (executable)

index 0000000..cc7267b
--- /dev/null
+++ b/compiler/luci-value-test/evalverifytol_ref.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# This script verifies the basic behavior of luci interpreter
+#
+# HOW TO USE
+#
+# ./evalverifytol_ref.sh <path/to/bin_dir> <path/to/ref_dir> <path/to/eval_driver> \
+#                        <TEST 1> <RTOL 1> <ATOL 1> <TEST 2> <RTOL 2> <ATOL 2> ...
+# bin_dir  : build directory of luci-value-test (ex: build/compiler/luci-value-test)
+# ref_dir  : artifacts directoy where reference test materials exist
+# eval_driver : luci_eval_driver path for evaluation
+
+VERIFY_SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VERIFY_SCRIPT_PATH="${VERIFY_SOURCE_PATH}/luci_eval_verifier_ref.py"
+BINDIR="$1"; shift
+REFDIR="$1"; shift
+INTERPRETER_DRIVER_PATH="$1"; shift
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+while (( "$#" >= 3 )); do
+  TESTCASE=$1
+  RTOLERANCE=$2
+  ATOLERANCE=$3
+  shift 3
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${REFDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BINDIR}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}.log" <(
+    exec 2>&1
+    set -ex
+
+    "python3" "${VERIFY_SCRIPT_PATH}" \
+    --driver "${INTERPRETER_DRIVER_PATH}" \
+    --model_ref "${TESTCASE_FILE}" \
+    --work_path "${TEST_RESULT_FILE}" \
+    --rtolf32 "${RTOLERANCE}" \
+    --atolf32 "${ATOLERANCE}"
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("${TESTCASE}")
+  else
+    FAILED+=("${TESTCASE}")
+  fi
+done
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py

index a76bd14039b23ca6f41fc74e9a9d08c699aa6c03..560e34fcafc60bd87b50e1ce06df5aad79322e59 100755 (executable)
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -14,16 +14,41 @@ import traceback
  parser = argparse.ArgumentParser()
  parser.add_argument('--driver', type=str, required=True)
  parser.add_argument('--model', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
  args = parser.parse_args()
  
  driver = args.driver
  tflite_model = args.model + ".tflite"
  circle_model = args.model + ".circle"
  
+rtolf32 = 1e-5
+atolf32 = 1e-5
+try:
+    if args.rtolf32 != None:
+        rtolf32 = float(args.rtolf32)
+    if args.atolf32 != None:
+        atolf32 = float(args.atolf32)
+except ValueError:
+    print("rtolf32 or atolf32 is not a number")
+    quit(128)
+
  # Build TFLite interpreter.
  interpreter = tf.lite.Interpreter(tflite_model)
  interpreter.allocate_tensors()
  
+# Read SignatureDef and get output tensor id orders for remapping
+full_signatures = interpreter._get_full_signature_list()
+full_signatures_outputs_remap = None
+if full_signatures != None:
+    signature_serving_default = full_signatures.get('serving_default', None)
+    if signature_serving_default != None:
+        signature_outputs = signature_serving_default['outputs']
+
+        full_signatures_outputs_remap = []
+        for index, (key, value) in enumerate(signature_outputs.items()):
+            full_signatures_outputs_remap.append(value)
+
  # Generate random input data.
  num_inputs = len(interpreter.get_input_details())
  for i in range(num_inputs):
@@ -31,19 +56,40 @@ for i in range(num_inputs):
      if input_details["dtype"] == np.float32:
          input_data = np.array(
              np.random.random_sample(input_details["shape"]), input_details["dtype"])
+        input_dtype = "float32"
      elif input_details["dtype"] == np.uint8:
          input_data = np.array(
              np.random.randint(0, 256, size=input_details["shape"]),
              input_details["dtype"])
+        input_dtype = "uint8"
+    elif input_details["dtype"] == np.int16:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int16"
+    elif input_details["dtype"] == np.int32:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int32"
+    elif input_details["dtype"] == np.int64:
+        input_data = np.array(
+            np.random.randint(0, 100, size=input_details["shape"]),
+            input_details["dtype"])
+        input_dtype = "int64"
      elif input_details["dtype"] == np.bool_:
          input_data = np.array(
              np.random.choice(a=[True, False], size=input_details["shape"]),
              input_details["dtype"])
+        input_dtype = "bool"
      else:
          raise SystemExit("Unsupported input dtype")
  
      interpreter.set_tensor(input_details["index"], input_data)
      input_data.tofile(circle_model + ".input" + str(i))
+    input_details["shape"].tofile(circle_model + ".input" + str(i) + ".shape", sep=',')
+    with open(circle_model + ".input" + str(i) + ".dtype", 'w') as dtype_file:
+        dtype_file.write(input_dtype)
  
  # Do inference
  interpreter.invoke()
@@ -57,34 +103,57 @@ subprocess.run(
      check=True)
  
  # Compare the results.
-for idx in range(len(interpreter.get_output_details())):
-    output_details = interpreter.get_output_details()[idx]
+inpt_output_details = interpreter.get_output_details()
+for idx in range(len(inpt_output_details)):
+    output_details = inpt_output_details[idx]
      output_data = np.fromfile(circle_model + ".output" + str(idx),
                                output_details["dtype"])
      shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
      output_shape = [int(i) for i in shape_file.read().split(',')]
      luci_output_data = np.reshape(output_data, output_shape)
-    intp_output_data = interpreter.get_tensor(output_details["index"])
+    output_tensor = output_details["index"]
+    if full_signatures_outputs_remap != None:
+        output_tensor = full_signatures_outputs_remap[idx]
+    intp_output_data = interpreter.get_tensor(output_tensor)
      try:
          if output_details["dtype"] == np.uint8:
              if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
+            output_dtype = "uint8"
          elif output_details["dtype"] == np.float32:
              if np.allclose(
-                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
+                    luci_output_data, intp_output_data, rtol=rtolf32,
+                    atol=atolf32) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
+            output_dtype = "float32"
          elif output_details["dtype"] == np.int64:
              if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
+            output_dtype = "int64"
          elif output_details["dtype"] == np.int32:
              if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
+            output_dtype = "int32"
+        elif output_details["dtype"] == np.int16:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+            output_dtype = "int16"
+        elif output_details["dtype"] == np.bool_:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + tflite_model +
+                                 " does not match with " + circle_model)
+            output_dtype = "bool"
          else:
              raise SystemExit("Unsupported data type: ", output_details["dtype"])
+
+        # save outputN.dtype file
+        with open(circle_model + ".output" + str(idx) + ".dtype", 'w') as dtype_file:
+            dtype_file.write(output_dtype)
      except:
          print(traceback.format_exc())
          quit(255)
diff --git a/compiler/luci-value-test/luci_eval_verifier_ref.py b/compiler/luci-value-test/luci_eval_verifier_ref.py

new file mode 100755 (executable)

index 0000000..5313e33
--- /dev/null
+++ b/compiler/luci-value-test/luci_eval_verifier_ref.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+import numpy as np
+import subprocess
+import argparse
+import traceback
+import os
+
+#
+# This script compares the execution result of luci-interpreter with that from ref_model path
+#
+# Basic usage:
+#   luci_eval_verifier_ref.py --driver build/compiler/luci-eval-driver/luci_eval_driver
+#           --ref_model ref_model_path --model this_model_path
+# Assumption:
+#   these file exist with its purpose
+#   - ref_model_path.circle; circle model
+#   - ref_model_path.circle.inputN; N'th input numpy data
+#   - ref_model_path.circle.inputN.dtype; N'th input data type in text
+#   - ref_model_path.circle.inputN.shape; N'th input data shape in CSV
+#   - ref_model_path.circle.outputN; N'th output numpy data
+#   - ref_model_path.circle.outputN.dtype; N'th output data type in text
+#   - ref_model_path.circle.outputN.shape; N'th output data shape in CSV
+
+
+def dtype_from_file(file_path):
+    with open(file_path, 'r') as dtype_file:
+        dtype_str = dtype_file.read()
+    if dtype_str == "float32":
+        return np.float32
+    if dtype_str == "uint8":
+        return np.uint8
+    if dtype_str == "int16":
+        return np.int16
+    if dtype_str == "int32":
+        return np.int32
+    if dtype_str == "int64":
+        return np.int64
+    if dtype_str == "bool":
+        return np.bool_
+    raise SystemExit("Unsupported dtype from file", dtype_str)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--driver', type=str, required=True)
+parser.add_argument('--model_ref', type=str, required=True)
+parser.add_argument('--work_path', type=str, required=True)
+parser.add_argument('--rtolf32', type=str, required=False)
+parser.add_argument('--atolf32', type=str, required=False)
+args = parser.parse_args()
+
+driver = args.driver
+circle_model_ref = args.model_ref + ".circle"
+circle_model = args.work_path + ".circle"
+# circle_model is used as to follow existing luci_eval_verifier.py
+
+rtolf32 = 1e-5
+atolf32 = 1e-5
+try:
+    if args.rtolf32 != None:
+        rtolf32 = float(args.rtolf32)
+    if args.atolf32 != None:
+        atolf32 = float(args.atolf32)
+except ValueError:
+    print("rtolf32 or atolf32 is not a number")
+    quit(128)
+
+# get num of inputs by checking existance of model.inputN
+check_input = 0
+while True:
+    input_file_path = circle_model_ref + ".input" + str(check_input)
+    if not os.path.isfile(input_file_path):
+        num_inputs = check_input
+        break
+    check_input = check_input + 1
+
+if num_inputs == 0:
+    print("input file not exist for", circle_model_ref)
+    quit(128)
+
+# get num of outputs by checking existance of model.outputN
+check_output = 0
+while True:
+    output_file_path = circle_model_ref + ".output" + str(check_output)
+    if not os.path.isfile(output_file_path):
+        num_outputs = check_output
+        break
+    check_output = check_output + 1
+
+if num_outputs == 0:
+    print("output file not exist for", circle_model_ref)
+    quit(128)
+
+# Execute luci interpreter with reference input
+subprocess.run(
+    [
+        driver, circle_model_ref,
+        str(num_inputs), circle_model_ref + ".input", circle_model + ".output"
+    ],
+    check=True)
+
+# Compare the results.
+for idx in range(num_outputs):
+    output_dtype = dtype_from_file(circle_model_ref + ".output" + str(idx) + ".dtype")
+    shape_file = open(circle_model_ref + ".output" + str(idx) + ".shape", 'r')
+    output_shape = [int(i) for i in shape_file.read().split(',')]
+
+    output_data_ref = np.fromfile(circle_model_ref + ".output" + str(idx), output_dtype)
+    luci_output_data_ref = np.reshape(output_data_ref, output_shape)
+
+    output_data = np.fromfile(circle_model + ".output" + str(idx), output_dtype)
+    luci_output_data = np.reshape(output_data, output_shape)
+
+    try:
+        if output_dtype == np.uint8:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.float32:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=rtolf32,
+                    atol=atolf32) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int64:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int32:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.int16:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        elif output_dtype == np.bool_:
+            if np.allclose(
+                    luci_output_data, luci_output_data_ref, rtol=0, atol=0) == False:
+                raise SystemExit("Execution result of " + circle_model_ref +
+                                 " does not match with " + circle_model)
+        else:
+            raise SystemExit("Unsupported data type: ", output_dtype)
+    except:
+        print(traceback.format_exc())
+        quit(255)
+
+quit(0)
diff --git a/compiler/luci-value-test/test.lst b/compiler/luci-value-test/test.lst

index 2b5c93fa3d81cf33200b57831efc255b526e3e60..f62b72919a28b5ea51f18c6a699cd24f48c8a6d9 100644 (file)
--- a/compiler/luci-value-test/test.lst
+++ b/compiler/luci-value-test/test.lst
@@ -20,90 +20,90 @@ addeval(ArgMax_U8_003)
  #addeval(ArgMin_U8_002)
  #addeval(ArgMin_U8_003)
  addeval(AveragePool2D_000)
-#addeval(BatchMatMul_000)
+addeval(BatchMatMul_000)
  #addeval(BatchMatMulV2_000)
  #addeval(BatchMatMulV2_001)
  #addeval(BatchToSpaceND_000)
-#addeval(Cast_000)
-#addeval(Cast_001)
+addeval(Cast_000)
+addeval(Cast_001)
  #addeval(Ceil_000)
  addeval(Concatenation_000)
  addeval(Concatenation_U8_000)
  addeval(Conv2D_000)
  addeval(Conv2D_001)
  addeval(Conv2D_002)
-#addeval(Conv2D_003)
+addeval(Conv2D_003)
  addeval(Conv2D_U8_000)
  addeval(Conv2D_U8_001)
  #addeval(Cos_000)
-#addeval(DepthToSpace_000)
+addeval(DepthToSpace_000)
  addeval(DepthwiseConv2D_000)
  addeval(DepthwiseConv2D_U8_000)
  #addeval(DepthwiseConv2D_U8_001)
  addeval(DepthwiseConv2D_001)
-#addeval(Div_000)
+addeval(Div_000)
  addeval(ELU_000)
-#addeval(Equal_000)
-#addeval(Exp_000)
+addeval(Equal_000)
+addeval(Exp_000)
  #addeval(ExpandDims_000)
  #addeval(ExpandDims_001)
  #addeval(ExpandDims_002)
  #addeval(ExpandDims_003)
  #addeval(Fill_000)
  #addeval(Fill_001)
-#addeval(Floor_000)
-#addeval(FloorDiv_000)
-#addeval(FloorDiv_001)
+addeval(Floor_000)
+addeval(FloorDiv_000)
+addeval(FloorDiv_001)
  #addeval(FloorMod_000)
  #addeval(FloorMod_001)
  addeval(FullyConnected_000)
  addeval(FullyConnected_001)
  addeval(FullyConnected_002)
  #addeval(FullyConnected_U8_000)
-#addeval(Gather_000)
+addeval(Gather_000)
  #addeval(GatherNd_000)
  #addeval(Greater_000)
-#addeval(GreaterEqual_000)
+addeval(GreaterEqual_000)
  addeval(If_000)
  addeval(If_001)
  addeval(L2Normalize_000)
  addeval(L2Pool2D_000)
  #addeval(L2Pool2D_U8_000)
  addeval(LeakyRelu_000)
-#addeval(Less_000)
-#addeval(LessEqual_000)
+addeval(Less_000)
+addeval(LessEqual_000)
  addeval(LocalResponseNormalization_000)
  #addeval(Log_000)
-#addeval(LogicalAnd_000)
-#addeval(LogicalNot_000)
-#addeval(LogicalOr_000)
+addeval(LogicalAnd_000)
+addeval(LogicalNot_000)
+addeval(LogicalOr_000)
  addeval(Logistic_000)
-#addeval(LogSoftmax_000)
+addeval(LogSoftmax_000)
  #addeval(MatMul_000)
  #addeval(MatrixDiag_000)
  #addeval(MatrixSetDiag_000)
-#addeval(Maximum_000)
+addeval(Maximum_000)
  addeval(MaxPool2D_000)
  addeval(MaxPool2D_U8_000)
  addeval(Mean_000)
  addeval(Mean_001)
-#addeval(Mean_U8_000)
-#addeval(Minimum_000)
+addeval(Mean_U8_000)
+addeval(Minimum_000)
  #addeval(MirrorPad_000)
  addeval(Mul_000)
  #addeval(Mul_U8_000)
-#addeval(Neg_000)
-#addeval(NotEqual_000)
-#addeval(OneHot_000)
-#addeval(OneHot_001)
-#addeval(OneHot_002)
+addeval(Neg_000)
+addeval(NotEqual_000)
+addeval(OneHot_000)
+addeval(OneHot_001)
+addeval(OneHot_002)
  #addeval(OneHot_003)
-#addeval(Pack_000)
-#addeval(Pack_U8_000)
+addeval(Pack_000)
+addeval(Pack_U8_000)
  addeval(Pad_000)
  addeval(Pad_U8_000)
-#addeval(Pow_000)
-#addeval(PRelu_000)
+addeval(Pow_000)
+addeval(PRelu_000)
  #addeval(Range_000)
  #addeval(Rank_000)
  #addeval(ReduceAny_000)
@@ -116,20 +116,20 @@ addeval(Pad_U8_000)
  #addeval(ReduceProd_001)
  #addeval(ReduceProd_002)
  #addeval(ReduceProd_003)
-#addeval(ReLU_000)
-#addeval(ReLU6_000)
+addeval(ReLU_000)
+addeval(ReLU6_000)
  #addeval(ReLUN1To1_000)
  addeval(Reshape_000)
  addeval(Reshape_001)
  addeval(Reshape_002)
  #addeval(Reshape_003)
  addeval(Reshape_U8_000)
-#addeval(ResizeBilinear_000)
-#addeval(ResizeNearestNeighbor_000)
+addeval(ResizeBilinear_000)
+addeval(ResizeNearestNeighbor_000)
  #addeval(ReverseSequence_000)
  #addeval(ReverseV2_000)
  #addeval(Round_000)
-#addeval(Rsqrt_000)
+addeval(Rsqrt_000)
  #addeval(ScatterNd_000)
  #addeval(SegmentSum_000)
  #addeval(Select_000)
@@ -139,37 +139,39 @@ addeval(Reshape_U8_000)
  #addeval(SelectV2_001)
  #addeval(SelectV2_002)
  #addeval(Shape_000)
+addeval(SignatureDef_MultiOut_000)
+addeval(SignatureDef_MultiOut_001)
  #addeval(Sin_000)
  addeval(Slice_000)
  addeval(Softmax_000)
-#addeval(Softmax_U8_000)
-#addeval(SpaceToBatchND_000)
-#addeval(SpaceToBatchND_001)
-#addeval(SpaceToBatchND_002)
-#addeval(SpaceToBatchND_003)
+addeval(Softmax_U8_000)
+addeval(SpaceToBatchND_000)
+addeval(SpaceToBatchND_001)
+addeval(SpaceToBatchND_002)
+addeval(SpaceToBatchND_003)
  addeval(SpaceToDepth_000)
  #addeval(SparseToDense_000)
  addeval(Split_000)
-#addeval(SplitV_000)
-#addeval(Sqrt_000)
-#addeval(Square_000)
-#addeval(SquaredDifference_000)
+addeval(SplitV_000)
+addeval(Sqrt_000)
+addeval(Square_000)
+addeval(SquaredDifference_000)
  addeval(Squeeze_000)
  addeval(Squeeze_001)
  addeval(StridedSlice_000)
  addeval(StridedSlice_001)
  addeval(StridedSlice_002)
-#addeval(Sub_000)
-#addeval(Sub_U8_000)
+addeval(Sub_000)
+addeval(Sub_U8_000)
  #addeval(Sum_000)
  #addeval(Sum_001)
-#addeval(Tanh_000)
+addeval(Tanh_000)
  #addeval(Tile_000)
  #addeval(Tile_U8_000)
  #addeval(TopKV2_000)
  #addeval(TopKV2_001)
  addeval(Transpose_000)
-#addeval(TransposeConv_000)
+addeval(TransposeConv_000)
  addeval(Unpack_000)
  addeval(Unpack_001)
  addeval(Unpack_002)
@@ -180,9 +182,13 @@ addeval(Unpack_003)
  #addeval(While_001)
  #addeval(While_002)
  #addeval(While_003)
-#addeval(YUV_TO_RGB_U8_000)
+addeval(YUV_TO_RGB_U8_000)
  #addeval(ZerosLike_000)
  
  # Simple Network test
  addeval(Part_While_000)
  addeval(Part_While_001)
+
+# Tests with tolerance
+addevaltol(SVDF_000 8e-3 8e-3)
+addevaltol(SVDF_001 8e-3 8e-3)
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt

index b92eefb40988f7606b15a6ea666a006e94ee8f5a..460dc7b23670493c96e12b2294313e1fdb1eafb2 100644 (file)
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -23,4 +23,8 @@ add_subdirectory(import)
  add_subdirectory(export)
  add_subdirectory(tester)
  
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  add_subdirectory(tests)
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt

index a267d0e1f9008d89f2b4efd2ef504aa26c2516ae..f46181eb6e944560c259aeaeac0ed338ff49757a 100644 (file)
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -12,7 +12,7 @@ target_include_directories(luci_export PUBLIC include)
  target_link_libraries(luci_export PRIVATE luci_lang)
  target_link_libraries(luci_export PRIVATE luci_service)
  target_link_libraries(luci_export PRIVATE luci_pass)
-target_link_libraries(luci_export PRIVATE mio_circle)
+target_link_libraries(luci_export PRIVATE mio_circle04)
  target_link_libraries(luci_export PRIVATE luci_env)
  target_link_libraries(luci_export PRIVATE luci_log)
  target_link_libraries(luci_export PRIVATE luci_logex)
@@ -36,6 +36,6 @@ target_include_directories(luci_export_test PRIVATE src)
  target_link_libraries(luci_export_test luci_export)
  target_link_libraries(luci_export_test luci_plan)
  target_link_libraries(luci_export_test luci_lang)
-target_link_libraries(luci_export_test mio_circle)
+target_link_libraries(luci_export_test mio_circle04)
  target_link_libraries(luci_export_test luci_env)
  target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/src/CircleBuiltinTypesExtractor.h b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h

new file mode 100644 (file)

index 0000000..0ff21a3
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesExtractor.h
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+#define __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
+
+#include "CircleExporterUtils.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <flatbuffers/flexbuffers.h>
+
+namespace luci
+{
+
+// NOTE Virtual nodes are not circle builtin operators.
+//      Therefore, they are not defined here.
+class BuiltinOptionsExtractor final
+  : public luci::CircleNodeMutableVisitor<flatbuffers::Offset<void>>
+{
+public:
+  BuiltinOptionsExtractor(flatbuffers::FlatBufferBuilder &builder) : _builder{builder}
+  {
+    // DO NOTHING
+  }
+
+public:
+  flatbuffers::Offset<void> visit(luci::CircleAbs *)
+  {
+    return circle::CreateAbsOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAdd *node)
+  {
+    return circle::CreateAddOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAddN *)
+  {
+    return circle::CreateAddNOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleArgMax *node)
+  {
+    return circle::CreateArgMaxOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleArgMin *node)
+  {
+    return circle::CreateArgMinOptions(_builder, luci::to_circle_tensortype(node->output_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleAveragePool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBatchMatMul *node)
+  {
+    return circle::CreateBatchMatMulOptions(_builder, node->adj_x(), node->adj_y()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBatchToSpaceND *)
+  {
+    return circle::CreateBatchToSpaceNDOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBidirectionalSequenceLSTM *node)
+  {
+    return circle::CreateBidirectionalSequenceLSTMOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+             node->proj_clip(), node->merge_outputs(), node->time_major(),
+             node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCast *node)
+  {
+    if (node->out_data_type() == loco::DataType::Unknown)
+      return _no_option;
+    else
+      return circle::CreateCastOptions(_builder, luci::to_circle_tensortype(node->in_data_type()),
+                                       luci::to_circle_tensortype(node->out_data_type()))
+        .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCeil *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleConcatenation *node)
+  {
+    return circle::CreateConcatenationOptions(_builder, node->axis(),
+                                              to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  // CircleConst is not virtual but not builtinOperator
+  // flatbuffers::Offset<void> visit(luci::CircleConst *)
+  flatbuffers::Offset<void> visit(luci::CircleConv2D *node)
+  {
+    return circle::CreateConv2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()),
+                                       node->dilation()->w(), node->dilation()->h())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCos *)
+  {
+    return circle::CreateCosOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleCustom *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleDepthToSpace *node)
+  {
+    return circle::CreateDepthToSpaceOptions(_builder, node->block_size()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleDepthwiseConv2D *node)
+  {
+    return circle::CreateDepthwiseConv2DOptions(
+             _builder, getOpPadding(node->padding()), node->stride()->w(), node->stride()->h(),
+             node->depthMultiplier(), to_circle_actfunc(node->fusedActivationFunction()),
+             node->dilation()->w(), node->dilation()->h())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleDequantize *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleDiv *node)
+  {
+    return circle::CreateDivOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleElu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleEqual *)
+  {
+    return circle::CreateEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleExp *)
+  {
+    return circle::CreateExpOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleExpandDims *)
+  {
+    return circle::CreateExpandDimsOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFakeQuant *node)
+  {
+    return circle::CreateFakeQuantOptions(_builder, node->min(), node->max(), node->num_bits(),
+                                          node->narrow_range())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFill *)
+  {
+    return circle::CreateFillOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFloor *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleFloorDiv *)
+  {
+    return circle::CreateFloorDivOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFloorMod *)
+  {
+    return circle::CreateFloorModOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleFullyConnected *node)
+  {
+    return circle::CreateFullyConnectedOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()),
+             to_circle_weightsformat(node->weights_format()), node->keep_num_dims())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGather *node)
+  {
+    return circle::CreateGatherOptions(_builder, node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGatherNd *)
+  {
+    return circle::CreateGatherNdOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGreater *)
+  {
+    return circle::CreateGreaterOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleGreaterEqual *)
+  {
+    return circle::CreateGreaterEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleIf *node)
+  {
+    return circle::CreateIfOptions(_builder, node->then_branch(), node->else_branch()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleL2Normalize *node)
+  {
+    return circle::CreateL2NormOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleL2Pool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLeakyRelu *node)
+  {
+    return circle::CreateLeakyReluOptions(_builder, node->alpha()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLess *)
+  {
+    return circle::CreateLessOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLessEqual *)
+  {
+    return circle::CreateLessEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLocalResponseNormalization *node)
+  {
+    return circle::CreateLocalResponseNormalizationOptions(_builder, node->radius(), node->bias(),
+                                                           node->alpha(), node->beta())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLog *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalAnd *)
+  {
+    return circle::CreateLogicalAndOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalNot *)
+  {
+    return circle::CreateLogicalNotOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogicalOr *)
+  {
+    return circle::CreateLogicalOrOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleLogistic *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleLogSoftmax *)
+  {
+    return circle::CreateLogSoftmaxOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMatrixDiag *)
+  {
+    return circle::CreateMatrixDiagOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMatrixSetDiag *)
+  {
+    return circle::CreateMatrixSetDiagOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMaximum *)
+  {
+    return circle::CreateMaximumMinimumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMaxPool2D *node)
+  {
+    return circle::CreatePool2DOptions(_builder, getOpPadding(node->padding()), node->stride()->w(),
+                                       node->stride()->h(), node->filter()->w(),
+                                       node->filter()->h(),
+                                       to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMean *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMinimum *)
+  {
+    return circle::CreateMaximumMinimumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMirrorPad *node)
+  {
+    return circle::CreateMirrorPadOptions(_builder, to_circle_mirrorpadmode(node->mode())).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleMul *node)
+  {
+    return circle::CreateMulOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNeg *)
+  {
+    return circle::CreateNegOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV4 *)
+  {
+    return circle::CreateNonMaxSuppressionV4Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNonMaxSuppressionV5 *)
+  {
+    return circle::CreateNonMaxSuppressionV5Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleNotEqual *)
+  {
+    return circle::CreateNotEqualOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleOneHot *node)
+  {
+    return circle::CreateOneHotOptions(_builder, node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePack *node)
+  {
+    return circle::CreatePackOptions(_builder, node->values_count(), node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePad *)
+  {
+    return circle::CreatePadOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePadV2 *)
+  {
+    return circle::CreatePadV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePow *)
+  {
+    return circle::CreatePowOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CirclePRelu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleQuantize *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRange *)
+  {
+    return circle::CreateRangeOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRank *)
+  {
+    return circle::CreateRankOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceAny *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceMax *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceMin *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReduceProd *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRelu *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRelu6 *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleReluN1To1 *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleReshape *node)
+  {
+    auto new_shape = _builder.CreateVector<int32_t>(
+      node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
+    return circle::CreateReshapeOptions(_builder, new_shape).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleResizeBilinear *node)
+  {
+    return circle::CreateResizeBilinearOptions(_builder, node->align_corners(),
+                                               node->half_pixel_centers())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleResizeNearestNeighbor *node)
+  {
+    return circle::CreateResizeNearestNeighborOptions(_builder, node->align_corners()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReverseSequence *node)
+  {
+    return circle::CreateReverseSequenceOptions(_builder, node->seq_axis(), node->batch_axis())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleReverseV2 *)
+  {
+    return circle::CreateReverseV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleRound *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleRsqrt *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleScatterNd *)
+  {
+    return circle::CreateScatterNdOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSegmentSum *)
+  {
+    return circle::CreateSegmentSumOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSelect *)
+  {
+    return circle::CreateSelectOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSelectV2 *)
+  {
+    return circle::CreateSelectV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleShape *node)
+  {
+    return circle::CreateShapeOptions(_builder, luci::to_circle_tensortype(node->out_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSin *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleSlice *)
+  {
+    return circle::CreateSliceOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSoftmax *node)
+  {
+    return circle::CreateSoftmaxOptions(_builder, node->beta()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSpaceToBatchND *)
+  {
+    return circle::CreateSpaceToBatchNDOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSpaceToDepth *node)
+  {
+    return circle::CreateSpaceToDepthOptions(_builder, node->block_size()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSparseToDense *node)
+  {
+    return circle::CreateSparseToDenseOptions(_builder, node->validate_indices()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSplit *node)
+  {
+    return circle::CreateSplitOptions(_builder, node->num_split()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSplitV *node)
+  {
+    return circle::CreateSplitVOptions(_builder, node->num_split()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSqrt *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleSquare *)
+  {
+    return circle::CreateSquareOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSquaredDifference *)
+  {
+    return circle::CreateSquaredDifferenceOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSqueeze *node)
+  {
+    auto squeeze_dims = _builder.CreateVector<int32_t>(node->squeeze_dims());
+    return circle::CreateSqueezeOptions(_builder, squeeze_dims).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleStridedSlice *node)
+  {
+    return circle::CreateStridedSliceOptions(_builder, node->begin_mask(), node->end_mask(),
+                                             node->ellipsis_mask(), node->new_axis_mask(),
+                                             node->shrink_axis_mask())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSub *node)
+  {
+    return circle::CreateSubOptions(_builder, to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSum *node)
+  {
+    return circle::CreateReducerOptions(_builder, node->keep_dims()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleSVDF *node)
+  {
+    return circle::CreateSVDFOptions(_builder, node->svdf_rank(),
+                                     to_circle_actfunc(node->fusedActivationFunction()),
+                                     node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTanh *) { return _no_option; }
+  flatbuffers::Offset<void> visit(luci::CircleTile *)
+  {
+    return circle::CreateTileOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTopKV2 *)
+  {
+    return circle::CreateTopKV2Options(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTranspose *)
+  {
+    return circle::CreateTransposeOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleTransposeConv *node)
+  {
+    return circle::CreateTransposeConvOptions(_builder, getOpPadding(node->padding()),
+                                              node->stride()->w(), node->stride()->h())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnidirectionalSequenceLSTM *node)
+  {
+    return circle::CreateUnidirectionalSequenceLSTMOptions(
+             _builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
+             node->proj_clip(), node->time_major(), node->asymmetric_quantize_inputs())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnique *node)
+  {
+    return circle::CreateUniqueOptions(_builder, luci::to_circle_tensortype(node->idx_out_type()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleUnpack *node)
+  {
+    return circle::CreateUnpackOptions(_builder, node->num(), node->axis()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleWhere *)
+  {
+    return circle::CreateWhereOptions(_builder).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleWhile *node)
+  {
+    return circle::CreateWhileOptions(_builder, node->cond_branch(), node->body_branch()).Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleZerosLike *)
+  {
+    return circle::CreateZerosLikeOptions(_builder).Union();
+  }
+  // Circle only
+  flatbuffers::Offset<void> visit(luci::CircleBCQFullyConnected *node)
+  {
+    return circle::CreateBCQFullyConnectedOptions(
+             _builder, node->weights_hidden_size(),
+             to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleBCQGather *node)
+  {
+    return circle::CreateBCQGatherOptions(_builder, node->input_hidden_size(), node->axis())
+      .Union();
+  }
+  flatbuffers::Offset<void> visit(luci::CircleInstanceNorm *node)
+  {
+    return circle::CreateInstanceNormOptions(_builder, node->epsilon(),
+                                             to_circle_actfunc(node->fusedActivationFunction()))
+      .Union();
+  }
+
+protected:
+  flatbuffers::FlatBufferBuilder &_builder;
+
+private:
+  const flatbuffers::Offset<void> _no_option = 0;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_BUILTIN_TYPES_EXTRACTOR_H__
diff --git a/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h

new file mode 100644 (file)

index 0000000..6f7c0f7
--- /dev/null
+++ b/compiler/luci/export/src/CircleBuiltinTypesMappingRule.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+#define __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+class BuiltinOperatorMappingRule final : public CircleNodeVisitor<circle::BuiltinOperator>
+{
+public:
+  BuiltinOperatorMappingRule()
+  {
+    // DO NOTHING
+  }
+
+public:
+  static BuiltinOperatorMappingRule &get()
+  {
+    static BuiltinOperatorMappingRule instance;
+    return instance;
+  }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+  circle::BuiltinOperator visit(const CIRCLE_NODE *) final { return circle::OP; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+class BuiltinOptionsMappingRule final : public CircleNodeVisitor<circle::BuiltinOptions>
+{
+public:
+  BuiltinOptionsMappingRule()
+  {
+    // DO NOTHING
+  }
+
+public:
+  static BuiltinOptionsMappingRule &get()
+  {
+    static BuiltinOptionsMappingRule instance;
+    return instance;
+  }
+
+public:
+#define CIRCLE_NODE(CIRCLE_NODE, OP, OPTION) \
+  circle::BuiltinOptions visit(const CIRCLE_NODE *) final { return circle::OPTION; }
+// Virtual nodes are not circle builtin operator
+#define CIRCLE_VNODE(CIRCLE_NODE)
+#include "CircleOps.lst"
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_EXPORT_BUILTIN_TYPES_MAPPING_RULE_H__
diff --git a/compiler/luci/export/src/CircleExporterImpl.cpp b/compiler/luci/export/src/CircleExporterImpl.cpp

index 5868c176c6b8448b4953430f739d29952627591a..083add9be02f293afe7e271e5d4239e7b14f1d09 100644 (file)
--- a/compiler/luci/export/src/CircleExporterImpl.cpp
+++ b/compiler/luci/export/src/CircleExporterImpl.cpp
@@ -79,14 +79,19 @@ encodeOperatorCodes(FlatBufferBuilder &builder, std::unordered_map<luci::OpCode,
    for (auto it : opcodes)
    {
      uint32_t idx = it.second;
+    int8_t dep_code = 127; // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES
+    if (it.first.opcode < BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      dep_code = static_cast<int8_t>(it.first.opcode);
      if (it.first.opcode != BuiltinOperator_CUSTOM)
      {
-      operator_codes_vec[idx] = CreateOperatorCode(builder, it.first.opcode, 0, it.first.version);
+      operator_codes_vec[idx] =
+        CreateOperatorCode(builder, dep_code, 0, it.first.version, it.first.opcode);
      }
      else
      {
        operator_codes_vec[idx] =
-        CreateOperatorCode(builder, it.first.opcode, builder.CreateString(it.first.custom_code));
+        CreateOperatorCode(builder, dep_code, builder.CreateString(it.first.custom_code),
+                           it.first.version, it.first.opcode);
      }
    }
  
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp

index 3a7ba304f8ad039fcfb348d09fdc2d0d6e49f74b..9473c2c4ea3e81eb7573c2656df1ddb0b2801f7f 100644 (file)
--- a/compiler/luci/export/src/CircleExporterUtils.cpp
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -15,6 +15,7 @@
   */
  
  #include "CircleExporterUtils.h"
+#include "CircleBuiltinTypesMappingRule.h"
  
  #include <oops/InternalExn.h>
  
@@ -163,36 +164,63 @@ circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVe
    }
  }
  
-} // namespace luci
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node)
+{
+  return node->accept(&BuiltinOperatorMappingRule::get());
+}
  
-namespace luci
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node)
  {
+  if (auto cast = dynamic_cast<const luci::CircleCast *>(node))
+  {
+    return (cast->out_data_type() == loco::DataType::Unknown) ? circle::BuiltinOptions_NONE
+                                                              : circle::BuiltinOptions_CastOptions;
+  }
  
-uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
-                                                    const int32_t op_version)
+  return node->accept(&BuiltinOptionsMappingRule::get());
+}
+
+std::string circle_custom_code(const luci::CircleNode *node)
  {
-  assert(op_version > 0);
+  if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
+  {
+    return custom_node->custom_code();
+  }
  
-  auto it = _operator_codes.find(OpCode{builtin_code, "", op_version});
-  if (it != _operator_codes.end())
+  return "";
+}
+
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node)
+{
+  if (auto custom_node = dynamic_cast<const luci::CircleCustom *>(node))
    {
-    return it->second;
+    std::vector<uint8_t> custom_options_vec{custom_node->custom_options().begin(),
+                                            custom_node->custom_options().end()};
+    return fb.CreateVector(custom_options_vec);
    }
-  auto idx = static_cast<uint32_t>(_operator_codes.size());
-  _operator_codes.emplace(OpCode{builtin_code, "", op_version}, idx);
-  return idx;
+
+  return 0;
  }
  
-uint32_t SerializedModelData::registerCustomOpcode(const std::string &custom_code)
+} // namespace luci
+
+namespace luci
  {
-  const circle::BuiltinOperator builtin_code = circle::BuiltinOperator_CUSTOM;
-  auto it = _operator_codes.find(OpCode{builtin_code, custom_code});
+
+uint32_t SerializedModelData::registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+                                                    const std::string &custom_code,
+                                                    const int32_t op_version)
+{
+  assert(op_version > 0);
+
+  auto it = _operator_codes.find(OpCode{builtin_code, custom_code, op_version});
    if (it != _operator_codes.end())
    {
      return it->second;
    }
    auto idx = static_cast<uint32_t>(_operator_codes.size());
-  _operator_codes.emplace(OpCode{builtin_code, custom_code}, idx);
+  _operator_codes.emplace(OpCode{builtin_code, custom_code, op_version}, idx);
    return idx;
  }
  
diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h

index 95310b3530165f82db00095758e83e54589dd8fe..4a4c54a695a6e34c78e68a0f3baead05b787ad19 100644 (file)
--- a/compiler/luci/export/src/CircleExporterUtils.h
+++ b/compiler/luci/export/src/CircleExporterUtils.h
@@ -39,6 +39,12 @@ flatbuffers::Offset<void> to_circle_sparse_index_vector(flatbuffers::FlatBufferB
                                                          const SparseIndexVector &sparse_idx_vec);
  circle::SparseIndexVector to_circle_sparse_index_vector_type(luci::SparseIndexVectorType type);
  
+circle::BuiltinOperator circle_builtin_operator(const luci::CircleNode *node);
+circle::BuiltinOptions circle_builtin_options(const luci::CircleNode *node);
+std::string circle_custom_code(const luci::CircleNode *node);
+flatbuffers::Offset<flatbuffers::Vector<uint8_t>>
+circle_custom_options(flatbuffers::FlatBufferBuilder &fb, const luci::CircleNode *node);
+
  } // namespace luci
  
  namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp

index be64a52d4b5fa6f4ef06d9551075f113479d33dd..b300a7fcffd528d748a4d4f0f77f441c3ec7e1cc 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -15,1686 +15,30 @@
   */
  
  #include "CircleOperationExporter.h"
-#include "CircleExporterUtils.h"
-#include "Check.h"
+#include "CircleOperationExporterRule.h"
  
  #include <luci/IR/CircleNode.h>
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
  #include <luci/Profile/CircleNodeOrigin.h>
  #include <luci/Plan/CircleNodeExecutionPlan.h>
-#include <luci/UserSettings.h>
-#include <luci/Log.h>
+#include <loco/IR/Algorithm.h>
  
-#include <loco/IR/CanonicalNodeVisitor.h>
-#include <oops/InternalExn.h>
-
-#include <flatbuffers/flexbuffers.h>
-
-using namespace flatbuffers;
-using namespace circle;
-
-namespace
-{
-
-using namespace luci;
-
-struct ExportContext
-{
-  FlatBufferBuilder &builder;
-  SerializedModelData &md;
-  SerializedGraphData &gd;
-};
-
-/**
- * @brief Exports CircleMaxPool2D or CircleAveragePool2D
- *
- * @note  CirclePool2D should be one of CircleMaxPool2D or CircleAveragePool2D
- */
-template <class CirclePool2D>
-void export_pool_2d(ExportContext &ctx, CirclePool2D *node, circle::BuiltinOperator builtin_op)
-{
-  LUCI_ASSERT(builtin_op == circle::BuiltinOperator_MAX_POOL_2D ||
-                builtin_op == circle::BuiltinOperator_L2_POOL_2D ||
-                builtin_op == circle::BuiltinOperator_AVERAGE_POOL_2D,
-              "Should be L2Pool, MaxPool or AvgPool");
-  LUCI_ASSERT(node->padding() != luci::Padding::UNDEFINED, "Padding is not set");
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(builtin_op, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-
-  circle::Padding padding = getOpPadding(node->padding());
-
-  auto options = CreatePool2DOptions(ctx.builder, padding, node->stride()->w(), node->stride()->h(),
-                                     node->filter()->w(), node->filter()->h(),
-                                     to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_Pool2DOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop,
-                 circle::BuiltinOptions bot, flatbuffers::Offset<void> options_offset)
-{
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(node)};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, bot, options_offset);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-/**
- * @brief export simple nodes having void options
- */
-void export_node(ExportContext &ctx, loco::Node *node, circle::BuiltinOperator bop)
-{
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(bop, loco::must_cast<luci::CircleNode *>(node)->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->arg(i)));
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleAddN *node)
-{
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_ADD_N, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->inputs(i)));
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateAddNOptions(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_AddNOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCast *node)
-{
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CAST, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->x())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-
-  flatbuffers::Offset<Operator> op_offset;
-  if (node->out_data_type() != loco::DataType::Unknown)
-  {
-    auto options = CreateCastOptions(ctx.builder, to_circle_tensortype(node->in_data_type()),
-                                     to_circle_tensortype(node->out_data_type()));
-    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                               circle::BuiltinOptions_CastOptions, options.Union());
-  }
-  else
-  {
-    op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs);
-  }
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleConcatenation *node)
-{
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_CONCATENATION, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    inputs_vec.push_back(get_tensor_index(node->values(i)));
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateConcatenationOptions(ctx.builder, node->axis(),
-                                            to_circle_actfunc(node->fusedActivationFunction()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ConcatenationOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleCustom *node)
-{
-  auto custom_outputs = loco::succs(node);
-  assert(custom_outputs.size() == node->numOutputs());
-
-  uint32_t op_idx = ctx.md.registerCustomOpcode(node->custom_code());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t index = 0; index < node->numInputs(); index++)
-  {
-    inputs_vec.push_back(get_tensor_index(node->inputs(index)));
-  }
-  for (uint32_t index = 0; index < custom_outputs.size(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : custom_outputs)
-    {
-      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
-      if (custom_out->index() == static_cast<int32_t>(index))
-      {
-        outputs_vec.push_back(get_tensor_index(custom_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Custom output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> circle_custom_options;
-  std::vector<uint8_t> custom_options_vec{node->custom_options().begin(),
-                                          node->custom_options().end()};
-  circle_custom_options = ctx.builder.CreateVector(custom_options_vec);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs, circle::BuiltinOptions_NONE,
-                                  flatbuffers::Offset<void>(), circle_custom_options);
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleIf *node)
-{
-  auto if_outs = loco::succs(node);
-  assert(if_outs.size() == node->output_count());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_IF, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  inputs_vec.push_back(get_tensor_index(node->cond()));
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : if_outs)
-    {
-      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
-      if (if_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(if_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleIf output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateIfOptions(ctx.builder, node->then_branch(), node->else_branch());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_IfOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV4 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 2);
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V4,
-                                                 node->op_version());
-  std::vector<int32_t> inputs_vec{
-    get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-    get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-    get_tensor_index(node->score_threshold()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV4Options(ctx.builder);
-  auto op_offset =
-    CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                   circle::BuiltinOptions_NonMaxSuppressionV4Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleNonMaxSuppressionV5 *node)
-{
-  auto nms_outs = loco::succs(node);
-  assert(nms_outs.size() == 3);
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_NON_MAX_SUPPRESSION_V5,
-                                                 node->op_version());
-  std::vector<int32_t> inputs_vec{
-    get_tensor_index(node->boxes()),           get_tensor_index(node->scores()),
-    get_tensor_index(node->max_output_size()), get_tensor_index(node->iou_threshold()),
-    get_tensor_index(node->score_threshold()), get_tensor_index(node->soft_nms_sigma()),
-  };
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < nms_outs.size(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : nms_outs)
-    {
-      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
-      if (nms_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(nms_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateNonMaxSuppressionV5Options(ctx.builder);
-  auto op_offset =
-    CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                   circle::BuiltinOptions_NonMaxSuppressionV5Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleReverseV2 *node)
-{
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_REVERSE_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->tensor()), get_tensor_index(node->axis())};
-  std::vector<int32_t> outputs_vec{get_tensor_index(static_cast<loco::Node *>(node))};
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateReverseV2Options(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_ReverseSequenceOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplit *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT, node->op_version());
-  // NOTE BuiltinOperator_SPLIT input is placed at second position
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->split_dim()),
-                                  get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Split output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateSplitOptions(ctx.builder, node->num_split());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleSplitV *node)
-{
-  auto split_outs = loco::succs(node);
-  assert(int32_t(split_outs.size()) == node->num_split());
-
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_SPLIT_V, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()),
-                                  get_tensor_index(node->size_splits()),
-                                  get_tensor_index(node->split_dim())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num_split(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : split_outs)
-    {
-      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
-      if (split_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(split_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid SplitV output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateSplitVOptions(ctx.builder, node->num_split());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_SplitVOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleTopKV2 *node)
-{
-  auto topkv2_outs = loco::succs(node);
-  int outs_count = int32_t(topkv2_outs.size());
-  assert(outs_count == 2);
-
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_TOPK_V2, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input()), get_tensor_index(node->k())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < outs_count; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : topkv2_outs)
-    {
-      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
-      if (topkv2_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(topkv2_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid TopKV2 output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateTopKV2Options(ctx.builder);
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_TopKV2Options, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleUnique *node)
-{
-  auto unique_outs = loco::succs(node);
-  assert(int32_t(unique_outs.size()) == 2);
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNIQUE, node->op_version());
-
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < 2; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unique_outs)
-    {
-      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
-      if (unique_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unique_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid Unique output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateUniqueOptions(ctx.builder, to_circle_tensortype(node->idx_out_type()));
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UniqueOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleUnpack *node)
-{
-  LOGGER(l);
-  auto settings = luci::UserSettings::settings();
-
-  auto unpack_outs = loco::succs(node);
-  // NOTE real models may not use all of the outputs
-  if (static_cast<int32_t>(unpack_outs.size()) != node->num())
-  {
-    if (settings->get(luci::UserSettings::Key::DisableValidation))
-    {
-      WARN(l) << "Warning: export Unpack(" << node->name() << ") 'num' not same as outputs";
-    }
-    else
-      assert(false);
-  }
-
-  uint32_t op_idx =
-    ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_UNPACK, node->op_version());
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->value())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < node->num(); index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : unpack_outs)
-    {
-      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
-      if (unpack_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(unpack_out));
-        found = true;
-        break;
-      }
-    }
-    // NOTE real models may not use all of the outputs
-    if (!found)
-    {
-      if (settings->get(luci::UserSettings::Key::DisableValidation))
-      {
-        WARN(l) << "Warning: export Unpack(" << node->name() << ") output " << index << " not used";
-      }
-      else
-        assert(false);
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateUnpackOptions(ctx.builder, node->num(), node->axis());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_UnpackOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-void export_node(ExportContext &ctx, luci::CircleWhile *node)
-{
-  auto while_outs = loco::succs(node);
-  assert(while_outs.size() == node->output_count());
-
-  uint32_t op_idx = ctx.md.registerBuiltinOpcode(circle::BuiltinOperator_WHILE, node->op_version());
-  std::vector<int32_t> inputs_vec;
-  std::vector<int32_t> outputs_vec;
-
-  for (uint32_t idx = 0; idx < node->input_count(); ++idx)
-    inputs_vec.push_back(get_tensor_index(node->input(idx)));
-
-  for (uint32_t idx = 0; idx < node->output_count(); ++idx)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : while_outs)
-    {
-      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
-      if (while_out->index() == static_cast<int32_t>(idx))
-      {
-        outputs_vec.push_back(get_tensor_index(while_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid CircleWhile output");
-    }
-  }
-
-  auto inputs = ctx.builder.CreateVector(inputs_vec);
-  auto outputs = ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateWhileOptions(ctx.builder, node->cond_branch(), node->body_branch());
-  auto op_offset = CreateOperator(ctx.builder, op_idx, inputs, outputs,
-                                  circle::BuiltinOptions_WhileOptions, options.Union());
-  ctx.gd._operators.push_back(op_offset);
-}
-
-class ExportHelper
-{
-public:
-  ExportHelper(ExportContext &ctx) : _ctx{ctx}
-  {
-    // DO NOTHING
-  }
-
-protected:
-  /**
-   * @brief export simple nodes
-   */
-  void export_simple(loco::Node *node, circle::BuiltinOperator bop, circle::BuiltinOptions bot,
-                     flatbuffers::Offset<void> options_offset)
-  {
-    export_node(_ctx, node, bop, bot, options_offset);
-  }
-
-  /**
-   * @brief export simple nodes having void options
-   */
-  void export_simple(loco::Node *node, circle::BuiltinOperator bop)
-  {
-    export_node(_ctx, node, bop);
-  }
-
-protected:
-  ExportContext &_ctx;
-};
-
-enum class OE
-{
-  ABC,
-  DEF,
-  GHIJ,
-  KLMN,
-  OPQR,
-  STUV,
-  WXYZ,
-  CIRC, // circle only
-  VIRT, // virtual
-};
-
-class OperationExporter final : public ExportHelper
-{
-public:
-  OperationExporter(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void export_node(luci::CircleNode *);
-};
-
-template <OE oe> class OpExporterLet;
-
-template <>
-class OpExporterLet<OE::ABC> final : public luci::CircleNodeMutableVisitor<void>,
-                                     public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  // NOTE visit for luci::CircleNode is added NOT to throw NYI
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleAbs *) final;
-  void visit(luci::CircleAdd *) final;
-  void visit(luci::CircleAddN *) final;
-  void visit(luci::CircleArgMax *) final;
-  void visit(luci::CircleArgMin *) final;
-  void visit(luci::CircleAveragePool2D *) final;
-  void visit(luci::CircleBatchMatMul *) final;
-  void visit(luci::CircleBatchToSpaceND *) final;
-  void visit(luci::CircleBidirectionalSequenceLSTM *) final;
-  void visit(luci::CircleCast *) final;
-  void visit(luci::CircleCeil *) final;
-  void visit(luci::CircleConcatenation *) final;
-  void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
-  void visit(luci::CircleConv2D *) final;
-  void visit(luci::CircleCos *) final;
-  void visit(luci::CircleCustom *) final;
-};
-
-template <>
-class OpExporterLet<OE::DEF> final : public luci::CircleNodeMutableVisitor<void>,
-                                     public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleDepthToSpace *) final;
-  void visit(luci::CircleDepthwiseConv2D *) final;
-  void visit(luci::CircleDequantize *) final;
-  void visit(luci::CircleDiv *) final;
-  void visit(luci::CircleElu *) final;
-  void visit(luci::CircleEqual *) final;
-  void visit(luci::CircleExp *) final;
-  void visit(luci::CircleExpandDims *) final;
-  void visit(luci::CircleFakeQuant *) final;
-  void visit(luci::CircleFill *) final;
-  void visit(luci::CircleFloor *) final;
-  void visit(luci::CircleFloorDiv *) final;
-  void visit(luci::CircleFloorMod *) final;
-  void visit(luci::CircleFullyConnected *) final;
-};
-
-template <>
-class OpExporterLet<OE::GHIJ> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleGather *) final;
-  void visit(luci::CircleGatherNd *) final;
-  void visit(luci::CircleGreater *) final;
-  void visit(luci::CircleGreaterEqual *) final;
-  void visit(luci::CircleIf *) final;
-};
-
-template <>
-class OpExporterLet<OE::KLMN> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleL2Normalize *) final;
-  void visit(luci::CircleL2Pool2D *) final;
-  void visit(luci::CircleLeakyRelu *) final;
-  void visit(luci::CircleLess *) final;
-  void visit(luci::CircleLessEqual *) final;
-  void visit(luci::CircleLocalResponseNormalization *) final;
-  void visit(luci::CircleLog *) final;
-  void visit(luci::CircleLogicalAnd *) final;
-  void visit(luci::CircleLogicalNot *) final;
-  void visit(luci::CircleLogicalOr *) final;
-  void visit(luci::CircleLogistic *) final;
-  void visit(luci::CircleLogSoftmax *) final;
-  void visit(luci::CircleMatrixDiag *) final;
-  void visit(luci::CircleMatrixSetDiag *) final;
-  void visit(luci::CircleMaximum *) final;
-  void visit(luci::CircleMaxPool2D *) final;
-  void visit(luci::CircleMean *) final;
-  void visit(luci::CircleMinimum *) final;
-  void visit(luci::CircleMirrorPad *) final;
-  void visit(luci::CircleMul *) final;
-  void visit(luci::CircleNeg *) final;
-  void visit(luci::CircleNonMaxSuppressionV4 *) final;
-  void visit(luci::CircleNonMaxSuppressionV5 *) final;
-  void visit(luci::CircleNotEqual *) final;
-};
-
-template <>
-class OpExporterLet<OE::OPQR> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleOneHot *) final;
-  void visit(luci::CirclePack *) final;
-  void visit(luci::CirclePad *) final;
-  void visit(luci::CirclePadV2 *) final;
-  void visit(luci::CirclePow *) final;
-  void visit(luci::CirclePRelu *) final;
-  void visit(luci::CircleQuantize *) final;
-  void visit(luci::CircleRange *) final;
-  void visit(luci::CircleRank *) final;
-  void visit(luci::CircleReduceAny *) final;
-  void visit(luci::CircleReduceMax *) final;
-  void visit(luci::CircleReduceMin *) final;
-  void visit(luci::CircleReduceProd *) final;
-  void visit(luci::CircleRelu *) final;
-  void visit(luci::CircleRelu6 *) final;
-  void visit(luci::CircleReluN1To1 *) final;
-  void visit(luci::CircleReshape *) final;
-  void visit(luci::CircleResizeBilinear *) final;
-  void visit(luci::CircleResizeNearestNeighbor *) final;
-  void visit(luci::CircleReverseSequence *) final;
-  void visit(luci::CircleReverseV2 *) final;
-  void visit(luci::CircleRound *) final;
-  void visit(luci::CircleRsqrt *) final;
-};
-
-template <>
-class OpExporterLet<OE::STUV> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleScatterNd *) final;
-  void visit(luci::CircleSegmentSum *) final;
-  void visit(luci::CircleSelect *) final;
-  void visit(luci::CircleSelectV2 *) final;
-  void visit(luci::CircleShape *) final;
-  void visit(luci::CircleSin *) final;
-  void visit(luci::CircleSlice *) final;
-  void visit(luci::CircleSoftmax *) final;
-  void visit(luci::CircleSpaceToBatchND *) final;
-  void visit(luci::CircleSpaceToDepth *) final;
-  void visit(luci::CircleSparseToDense *) final;
-  void visit(luci::CircleSplit *) final;
-  void visit(luci::CircleSplitV *) final;
-  void visit(luci::CircleSqrt *) final;
-  void visit(luci::CircleSquare *) final;
-  void visit(luci::CircleSquaredDifference *) final;
-  void visit(luci::CircleSqueeze *) final;
-  void visit(luci::CircleStridedSlice *) final;
-  void visit(luci::CircleSub *) final;
-  void visit(luci::CircleSum *) final;
-  void visit(luci::CircleTanh *) final;
-  void visit(luci::CircleTile *) final;
-  void visit(luci::CircleTopKV2 *) final;
-  void visit(luci::CircleTranspose *) final;
-  void visit(luci::CircleTransposeConv *) final;
-  void visit(luci::CircleUnidirectionalSequenceLSTM *) final;
-  void visit(luci::CircleUnique *) final;
-  void visit(luci::CircleUnpack *) final;
-};
-
-template <>
-class OpExporterLet<OE::WXYZ> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  void visit(luci::CircleWhere *) final;
-  void visit(luci::CircleWhile *) final;
-  void visit(luci::CircleZerosLike *) final;
-};
-
-template <>
-class OpExporterLet<OE::CIRC> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  // Circle only
-  void visit(luci::CircleBCQFullyConnected *) final;
-  void visit(luci::CircleBCQGather *) final;
-  void visit(luci::CircleInstanceNorm *) final;
-};
-
-template <>
-class OpExporterLet<OE::VIRT> final : public luci::CircleNodeMutableVisitor<void>,
-                                      public ExportHelper
-{
-public:
-  OpExporterLet(ExportContext &ctx) : ExportHelper(ctx)
-  {
-    // DO NOTHING
-  }
-
-public:
-  void visit(luci::CircleNode *) final {}
-
-public:
-  // Virtual
-  void visit(luci::CircleInput *) final {}
-  void visit(luci::CircleOutput *) final {}
-  void visit(luci::CircleOutputDummy *) final {}
-  void visit(luci::CircleOutputExclude *) final {}
-  // Virtual for multiple-outputs
-  void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
-  void visit(luci::CircleCustomOut *) final {}
-  void visit(luci::CircleIfOut *) final {}
-  void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
-  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
-  void visit(luci::CircleSplitOut *) final {}
-  void visit(luci::CircleSplitVOut *) final {}
-  void visit(luci::CircleTopKV2Out *) final {}
-  void visit(luci::CircleUniqueOut *) final {}
-  void visit(luci::CircleUnpackOut *) final {}
-  void visit(luci::CircleWhileOut *) final {}
-};
-
-void OperationExporter::export_node(luci::CircleNode *node)
-{
-  // TODO revise return type to bool and return if handled
-#define VISIT_OE(GRP)                \
-  do                                 \
-  {                                  \
-    OpExporterLet<OE::GRP> oe(_ctx); \
-    node->accept(&oe);               \
-  } while (false)
-
-  VISIT_OE(ABC);
-  VISIT_OE(DEF);
-  VISIT_OE(GHIJ);
-  VISIT_OE(KLMN);
-  VISIT_OE(OPQR);
-  VISIT_OE(STUV);
-  VISIT_OE(WXYZ);
-  VISIT_OE(CIRC);
-  VISIT_OE(VIRT);
-
-#undef VISIT_OE
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAbs *node)
-{
-  export_simple(node, circle::BuiltinOperator_ABS, circle::BuiltinOptions_AbsOptions,
-                CreateAbsOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAdd *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_ADD, circle::BuiltinOptions_AddOptions,
-    CreateAddOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAddN *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleArgMax *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_ARG_MAX, circle::BuiltinOptions_ArgMaxOptions,
-    CreateArgMaxOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleArgMin *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_ARG_MIN, circle::BuiltinOptions_ArgMinOptions,
-    CreateArgMinOptions(_ctx.builder, to_circle_tensortype(node->output_type())).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleAveragePool2D *node)
-{
-  export_pool_2d<luci::CircleAveragePool2D>(_ctx, node, circle::BuiltinOperator_AVERAGE_POOL_2D);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBatchMatMul *node)
-{
-  export_simple(node, circle::BuiltinOperator_BATCH_MATMUL,
-                circle::BuiltinOptions_BatchMatMulOptions,
-                CreateBatchMatMulOptions(_ctx.builder, node->adj_x(), node->adj_y()).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBidirectionalSequenceLSTM *node)
-{
-  auto bidi_lstm_outs = loco::succs(node);
-  assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
-  uint32_t op_idx = _ctx.md.registerBuiltinOpcode(
-    circle::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, node->op_version());
-
-  std::vector<int32_t> inputs_vec{get_tensor_index(node->input())};
-  std::vector<int32_t> outputs_vec;
-
-  for (int32_t index = 0; index < 2; index++)
-  {
-    // store in order of index
-    bool found = false;
-    for (auto out : bidi_lstm_outs)
-    {
-      auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
-      if (bidi_lstm_out->index() == index)
-      {
-        outputs_vec.push_back(get_tensor_index(bidi_lstm_out));
-        found = true;
-        break;
-      }
-    }
-    if (!found)
-    {
-      INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
-    }
-  }
-
-  auto inputs = _ctx.builder.CreateVector(inputs_vec);
-  auto outputs = _ctx.builder.CreateVector(outputs_vec);
-  auto options = CreateBidirectionalSequenceLSTMOptions(
-    _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()), node->cell_clip(),
-    node->proj_clip(), node->merge_outputs(), node->time_major(),
-    node->asymmetric_quantize_inputs());
-  auto op_offset =
-    CreateOperator(_ctx.builder, op_idx, inputs, outputs,
-                   circle::BuiltinOptions_BidirectionalSequenceLSTMOptions, options.Union());
-  _ctx.gd._operators.push_back(op_offset);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCast *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCeil *node)
-{
-  export_simple(node, circle::BuiltinOperator_CEIL);
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleConcatenation *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleBatchToSpaceND *node)
-{
-  export_simple(node, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
-                circle::BuiltinOptions_BatchToSpaceNDOptions,
-                CreateBatchToSpaceNDOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleConv2D *node)
-{
-  export_simple(node, circle::BuiltinOperator_CONV_2D, circle::BuiltinOptions_Conv2DOptions,
-                CreateConv2DOptions(_ctx.builder, getOpPadding(node->padding()),
-                                    node->stride()->w(), node->stride()->h(),
-                                    to_circle_actfunc(node->fusedActivationFunction()),
-                                    node->dilation()->w(), node->dilation()->h())
-                  .Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCos *node)
-{
-  export_simple(node, circle::BuiltinOperator_COS, circle::BuiltinOptions_CosOptions,
-                CreateCosOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::ABC>::visit(luci::CircleCustom *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDepthToSpace *node)
-{
-  export_simple(node, circle::BuiltinOperator_DEPTH_TO_SPACE,
-                circle::BuiltinOptions_DepthToSpaceOptions,
-                CreateDepthToSpaceOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDepthwiseConv2D *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_DEPTHWISE_CONV_2D, circle::BuiltinOptions_DepthwiseConv2DOptions,
-    CreateDepthwiseConv2DOptions(_ctx.builder, getOpPadding(node->padding()), node->stride()->w(),
-                                 node->stride()->h(), node->depthMultiplier(),
-                                 to_circle_actfunc(node->fusedActivationFunction()),
-                                 node->dilation()->w(), node->dilation()->h())
-      .Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDequantize *node)
-{
-  export_simple(node, circle::BuiltinOperator_DEQUANTIZE);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleDiv *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_DIV, circle::BuiltinOptions_DivOptions,
-    CreateDivOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleElu *node)
-{
-  export_simple(node, circle::BuiltinOperator_ELU);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_EQUAL, circle::BuiltinOptions_EqualOptions,
-                CreateEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleExp *node)
-{
-  export_simple(node, circle::BuiltinOperator_EXP, circle::BuiltinOptions_ExpOptions,
-                CreateExpOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleExpandDims *node)
-{
-  export_simple(node, circle::BuiltinOperator_EXPAND_DIMS, circle::BuiltinOptions_ExpandDimsOptions,
-                CreateExpandDimsOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFakeQuant *node)
-{
-  export_simple(node, circle::BuiltinOperator_FAKE_QUANT, circle::BuiltinOptions_FakeQuantOptions,
-                CreateFakeQuantOptions(_ctx.builder, node->min(), node->max(), node->num_bits(),
-                                       node->narrow_range())
-                  .Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFill *node)
-{
-  export_simple(node, circle::BuiltinOperator_FILL, circle::BuiltinOptions_FillOptions,
-                CreateFillOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloor *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR);
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloorDiv *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR_DIV, circle::BuiltinOptions_FloorDivOptions,
-                CreateFloorDivOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFloorMod *node)
-{
-  export_simple(node, circle::BuiltinOperator_FLOOR_MOD, circle::BuiltinOptions_FloorModOptions,
-                CreateFloorModOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::DEF>::visit(luci::CircleFullyConnected *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_FULLY_CONNECTED, circle::BuiltinOptions_FullyConnectedOptions,
-    CreateFullyConnectedOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
-                                to_circle_weightsformat(node->weights_format()))
-      .Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGather *node)
-{
-  export_simple(node, circle::BuiltinOperator_GATHER, circle::BuiltinOptions_GatherOptions,
-                CreateGatherOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGatherNd *node)
-{
-  export_simple(node, circle::BuiltinOperator_GATHER_ND, circle::BuiltinOptions_GatherNdOptions,
-                CreateGatherNdOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreater *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER, circle::BuiltinOptions_GreaterOptions,
-                CreateGreaterOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleGreaterEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_GREATER_EQUAL,
-                circle::BuiltinOptions_GreaterEqualOptions,
-                CreateGreaterEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::GHIJ>::visit(luci::CircleIf *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Normalize *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_L2_NORMALIZATION, circle::BuiltinOptions_L2NormOptions,
-    CreateL2NormOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleL2Pool2D *node)
-{
-  export_pool_2d<luci::CircleL2Pool2D>(_ctx, node, circle::BuiltinOperator_L2_POOL_2D);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLeakyRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_LEAKY_RELU, circle::BuiltinOptions_LeakyReluOptions,
-                CreateLeakyReluOptions(_ctx.builder, node->alpha()).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLess *node)
-{
-  export_simple(node, circle::BuiltinOperator_LESS, circle::BuiltinOptions_LessOptions,
-                CreateLessOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLessEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_LESS_EQUAL, circle::BuiltinOptions_LessEqualOptions,
-                CreateLessEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLocalResponseNormalization *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
-                circle::BuiltinOptions_LocalResponseNormalizationOptions,
-                CreateLocalResponseNormalizationOptions(_ctx.builder, node->radius(), node->bias(),
-                                                        node->alpha(), node->beta())
-                  .Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLog *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOG);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalAnd *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_AND, circle::BuiltinOptions_LogicalAndOptions,
-                CreateLogicalAndOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalNot *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_NOT, circle::BuiltinOptions_LogicalNotOptions,
-                CreateLogicalNotOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogicalOr *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGICAL_OR, circle::BuiltinOptions_LogicalOrOptions,
-                CreateLogicalOrOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogistic *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOGISTIC);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleLogSoftmax *node)
-{
-  export_simple(node, circle::BuiltinOperator_LOG_SOFTMAX, circle::BuiltinOptions_LogSoftmaxOptions,
-                CreateLogSoftmaxOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixDiag *node)
-{
-  export_simple(node, circle::BuiltinOperator_MATRIX_DIAG, circle::BuiltinOptions_MatrixDiagOptions,
-                CreateMatrixDiagOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMatrixSetDiag *node)
-{
-  export_simple(node, circle::BuiltinOperator_MATRIX_SET_DIAG,
-                circle::BuiltinOptions_MatrixSetDiagOptions,
-                CreateMatrixSetDiagOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMaximum *node)
-{
-  export_simple(node, circle::BuiltinOperator_MAXIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMaxPool2D *node)
-{
-  export_pool_2d<luci::CircleMaxPool2D>(_ctx, node, circle::BuiltinOperator_MAX_POOL_2D);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMean *node)
-{
-  export_simple(node, circle::BuiltinOperator_MEAN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMinimum *node)
-{
-  export_simple(node, circle::BuiltinOperator_MINIMUM, circle::BuiltinOptions_MaximumMinimumOptions,
-                CreateMaximumMinimumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMirrorPad *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_MIRROR_PAD, circle::BuiltinOptions_MirrorPadOptions,
-    CreateMirrorPadOptions(_ctx.builder, to_circle_mirrorpadmode(node->mode())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleMul *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_MUL, circle::BuiltinOptions_MulOptions,
-    CreateMulOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNeg *node)
-{
-  export_simple(node, circle::BuiltinOperator_NEG, circle::BuiltinOptions_NegOptions,
-                CreateNegOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV4 *node)
-{
-  export_node(_ctx, node);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNonMaxSuppressionV5 *node)
-{
-  export_node(_ctx, node);
-}
-
-void OpExporterLet<OE::KLMN>::visit(luci::CircleNotEqual *node)
-{
-  export_simple(node, circle::BuiltinOperator_NOT_EQUAL, circle::BuiltinOptions_NotEqualOptions,
-                CreateNotEqualOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleOneHot *node)
-{
-  export_simple(node, circle::BuiltinOperator_ONE_HOT, circle::BuiltinOptions_OneHotOptions,
-                CreateOneHotOptions(_ctx.builder, node->axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePack *node)
-{
-  export_simple(node, circle::BuiltinOperator_PACK, circle::BuiltinOptions_PackOptions,
-                CreatePackOptions(_ctx.builder, node->values_count(), node->axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePad *node)
-{
-  export_simple(node, circle::BuiltinOperator_PAD, circle::BuiltinOptions_PadOptions,
-                CreatePadOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePadV2 *node)
-{
-  export_simple(node, circle::BuiltinOperator_PADV2, circle::BuiltinOptions_PadV2Options,
-                CreatePadV2Options(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePow *node)
-{
-  export_simple(node, circle::BuiltinOperator_POW, circle::BuiltinOptions_PowOptions,
-                CreatePowOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CirclePRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_PRELU);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleQuantize *node)
-{
-  export_simple(node, circle::BuiltinOperator_QUANTIZE);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRange *node)
-{
-  export_simple(node, circle::BuiltinOperator_RANGE, circle::BuiltinOptions_RangeOptions,
-                CreateRangeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRank *node)
-{
-  export_simple(node, circle::BuiltinOperator_RANK, circle::BuiltinOptions_RankOptions,
-                CreateRankOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceAny *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_ANY, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMax *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_MAX, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceMin *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_MIN, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReduceProd *node)
-{
-  export_simple(node, circle::BuiltinOperator_REDUCE_PROD, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRelu6 *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU6);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReluN1To1 *node)
-{
-  export_simple(node, circle::BuiltinOperator_RELU_N1_TO_1);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReshape *node)
-{
-  auto new_shape = _ctx.builder.CreateVector<int32_t>(
-    node->newShape()->rank(), [node](size_t i) { return node->newShape()->dim(i); });
-
-  export_simple(node, circle::BuiltinOperator_RESHAPE, circle::BuiltinOptions_ReshapeOptions,
-                CreateReshapeOptions(_ctx.builder, new_shape).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeBilinear *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_RESIZE_BILINEAR, circle::BuiltinOptions_ResizeBilinearOptions,
-    CreateResizeBilinearOptions(_ctx.builder, node->align_corners(), node->half_pixel_centers())
-      .Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleResizeNearestNeighbor *node)
-{
-  export_simple(node, circle::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
-                circle::BuiltinOptions_ResizeNearestNeighborOptions,
-                CreateResizeNearestNeighborOptions(_ctx.builder, node->align_corners()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseSequence *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_REVERSE_SEQUENCE, circle::BuiltinOptions_ReverseSequenceOptions,
-    CreateReverseSequenceOptions(_ctx.builder, node->seq_axis(), node->batch_axis()).Union());
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleReverseV2 *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRound *node)
-{
-  export_simple(node, circle::BuiltinOperator_ROUND);
-}
-
-void OpExporterLet<OE::OPQR>::visit(luci::CircleRsqrt *node)
-{
-  export_simple(node, circle::BuiltinOperator_RSQRT);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleScatterNd *node)
-{
-  export_simple(node, circle::BuiltinOperator_SCATTER_ND, circle::BuiltinOptions_ScatterNdOptions,
-                CreateScatterNdOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSegmentSum *node)
-{
-  export_simple(node, circle::BuiltinOperator_SEGMENT_SUM, circle::BuiltinOptions_SegmentSumOptions,
-                CreateSegmentSumOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSelect *node)
-{
-  export_simple(node, circle::BuiltinOperator_SELECT, circle::BuiltinOptions_SelectOptions,
-                CreateSelectOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSelectV2 *node)
-{
-  export_simple(node, circle::BuiltinOperator_SELECT_V2, circle::BuiltinOptions_SelectV2Options,
-                CreateSelectV2Options(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleShape *node)
-{
-  export_simple(node, circle::BuiltinOperator_SHAPE, circle::BuiltinOptions_ShapeOptions,
-                CreateShapeOptions(_ctx.builder, to_circle_tensortype(node->out_type())).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSin *node)
-{
-  export_simple(node, circle::BuiltinOperator_SIN);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSlice *node)
-{
-  export_simple(node, circle::BuiltinOperator_SLICE, circle::BuiltinOptions_SliceOptions,
-                CreateSliceOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSoftmax *node)
-{
-  export_simple(node, circle::BuiltinOperator_SOFTMAX, circle::BuiltinOptions_SoftmaxOptions,
-                CreateSoftmaxOptions(_ctx.builder, node->beta()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToBatchND *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPACE_TO_BATCH_ND,
-                circle::BuiltinOptions_SpaceToBatchNDOptions,
-                CreateSpaceToBatchNDOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSpaceToDepth *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPACE_TO_DEPTH,
-                circle::BuiltinOptions_SpaceToDepthOptions,
-                CreateSpaceToDepthOptions(_ctx.builder, node->block_size()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSparseToDense *node)
-{
-  export_simple(node, circle::BuiltinOperator_SPARSE_TO_DENSE,
-                circle::BuiltinOptions_SparseToDenseOptions,
-                CreateSparseToDenseOptions(_ctx.builder, node->validate_indices()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSplit *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSplitV *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSqrt *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQRT);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSquare *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQUARE, circle::BuiltinOptions_SquareOptions,
-                CreateSquareOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSquaredDifference *node)
-{
-  export_simple(node, circle::BuiltinOperator_SQUARED_DIFFERENCE,
-                circle::BuiltinOptions_SquaredDifferenceOptions,
-                CreateSquaredDifferenceOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSqueeze *node)
-{
-  auto squeeze_dims = _ctx.builder.CreateVector<int32_t>(node->squeeze_dims());
-  export_simple(node, circle::BuiltinOperator_SQUEEZE, circle::BuiltinOptions_SqueezeOptions,
-                CreateSqueezeOptions(_ctx.builder, squeeze_dims).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleStridedSlice *node)
-{
-  export_simple(node, circle::BuiltinOperator_STRIDED_SLICE,
-                circle::BuiltinOptions_StridedSliceOptions,
-                CreateStridedSliceOptions(_ctx.builder, node->begin_mask(), node->end_mask(),
-                                          node->ellipsis_mask(), node->new_axis_mask(),
-                                          node->shrink_axis_mask())
-                  .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSub *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_SUB, circle::BuiltinOptions_SubOptions,
-    CreateSubOptions(_ctx.builder, to_circle_actfunc(node->fusedActivationFunction())).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleSum *node)
-{
-  export_simple(node, circle::BuiltinOperator_SUM, circle::BuiltinOptions_ReducerOptions,
-                CreateReducerOptions(_ctx.builder, node->keep_dims()).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTanh *node)
-{
-  export_simple(node, circle::BuiltinOperator_TANH);
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTile *node)
-{
-  export_simple(node, circle::BuiltinOperator_TILE, circle::BuiltinOptions_TileOptions,
-                CreateTileOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTopKV2 *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTranspose *node)
-{
-  export_simple(node, circle::BuiltinOperator_TRANSPOSE, circle::BuiltinOptions_TransposeOptions,
-                CreateTransposeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleTransposeConv *node)
-{
-  export_simple(node, circle::BuiltinOperator_TRANSPOSE_CONV,
-                circle::BuiltinOptions_TransposeConvOptions,
-                CreateTransposeConvOptions(_ctx.builder, getOpPadding(node->padding()),
-                                           node->stride()->w(), node->stride()->h())
-                  .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnidirectionalSequenceLSTM *node)
-{
-  export_simple(node, circle::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
-                circle::BuiltinOptions_UnidirectionalSequenceLSTMOptions,
-                CreateUnidirectionalSequenceLSTMOptions(
-                  _ctx.builder, to_circle_actfunc(node->fusedActivationFunction()),
-                  node->cell_clip(), node->proj_clip(), node->time_major(),
-                  node->asymmetric_quantize_inputs())
-                  .Union());
-}
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnique *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::STUV>::visit(luci::CircleUnpack *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhere *node)
-{
-  export_simple(node, circle::BuiltinOperator_WHERE, circle::BuiltinOptions_WhereOptions,
-                CreateWhereOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleWhile *node) { export_node(_ctx, node); }
-
-void OpExporterLet<OE::WXYZ>::visit(luci::CircleZerosLike *node)
-{
-  export_simple(node, circle::BuiltinOperator_ZEROS_LIKE, circle::BuiltinOptions_ZerosLikeOptions,
-                CreateZerosLikeOptions(_ctx.builder).Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQFullyConnected *node)
-{
-  export_simple(node, circle::BuiltinOperator_BCQ_FULLY_CONNECTED,
-                circle::BuiltinOptions_BCQFullyConnectedOptions,
-                CreateBCQFullyConnectedOptions(_ctx.builder, node->weights_hidden_size(),
-                                               to_circle_actfunc(node->fusedActivationFunction()))
-                  .Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleBCQGather *node)
-{
-  export_simple(
-    node, circle::BuiltinOperator_BCQ_GATHER, circle::BuiltinOptions_BCQGatherOptions,
-    CreateBCQGatherOptions(_ctx.builder, node->input_hidden_size(), node->axis()).Union());
-}
-
-void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node)
+namespace luci
  {
-  export_simple(node, circle::BuiltinOperator_INSTANCE_NORM,
-                circle::BuiltinOptions_InstanceNormOptions,
-                CreateInstanceNormOptions(_ctx.builder, node->epsilon(),
-                                          to_circle_actfunc(node->fusedActivationFunction()))
-                  .Union());
-}
  
-void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
-                SerializedGraphData &gd, uint32_t node_position)
+void exportNodes(loco::Graph *g, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
+                 SerializedGraphData &gd)
  {
-  if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+  uint32_t node_position = 0;
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
    {
      ExportContext ctx{builder, md, gd};
-    OperationExporter exporter{ctx};
+    OperationExporterRule exporter_rule{ctx};
+
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    circle_node->accept(&exporter_rule);
  
      const auto ops_size = gd._operators.size();
  
-    exporter.export_node(circle_node);
      if (has_origin(circle_node) && ops_size != gd._operators.size())
      {
        const auto node_id = gd._operators.size() - 1;
@@ -1716,25 +60,7 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
        }
        md._metadata.add_execution_plan_table(node_position, execution_plan_vector);
      }
-  }
-  else
-  {
-    INTERNAL_EXN("Node with unsupported dialect found");
-  }
-}
-
-} // namespace
  
-namespace luci
-{
-
-void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
-                 SerializedGraphData &gd)
-{
-  uint32_t node_position = 0;
-  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
-  {
-    exportNode(node, builder, md, gd, node_position);
      node_position++;
    }
  }
diff --git a/compiler/luci/export/src/CircleOperationExporter.h b/compiler/luci/export/src/CircleOperationExporter.h

index de6abfc5492aedbe87ff2714f27943386d27faac..f2b3cfd6b04ac07642d5f1f258e639f7f496f86b 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.h
+++ b/compiler/luci/export/src/CircleOperationExporter.h
@@ -17,7 +17,7 @@
  #ifndef __CIRCLE_OPERATION_EXPORTER_H__
  #define __CIRCLE_OPERATION_EXPORTER_H__
  
-#include "CircleExporterUtils.h"
+#include "SerializedData.h"
  
  #include <loco/IR/Graph.h>
  
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.cpp b/compiler/luci/export/src/CircleOperationExporterRule.cpp

new file mode 100644 (file)

index 0000000..8dc59fa
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleOperationExporterRule.h"
+#include "CircleBuiltinTypesExtractor.h"
+#include "Check.h"
+
+#include <loco/IR/Graph.h>
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <oops/InternalExn.h>
+
+#include <vector>
+
+namespace
+{
+class OutputVectorExtractor final : public luci::CircleNodeMutableVisitor<std::vector<int32_t>>
+{
+public:
+  OutputVectorExtractor()
+  {
+    // DO NOTHING
+  }
+
+public:
+  std::vector<int32_t> visit(luci::CircleNode *node) final
+  {
+    std::vector<int32_t> outputs_vec{luci::get_tensor_index(node)};
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleBidirectionalSequenceLSTM *node) final
+  {
+    auto bidi_lstm_outs = loco::succs(node);
+    assert((bidi_lstm_outs.size() == 1) || (bidi_lstm_outs.size() == 2));
+
+    std::vector<int32_t> outputs_vec(bidi_lstm_outs.size());
+
+    for (auto out : bidi_lstm_outs)
+    {
+      auto bidi_lstm_out = loco::must_cast<luci::CircleBidirectionalSequenceLSTMOut *>(out);
+      if (bidi_lstm_out->index() >= int32_t(bidi_lstm_outs.size()))
+        INTERNAL_EXN("Invalid BidirectionalSequenceLSTM output");
+      outputs_vec[bidi_lstm_out->index()] = luci::get_tensor_index(bidi_lstm_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleCustom *node) final
+  {
+    auto custom_outputs = loco::succs(node);
+    assert(custom_outputs.size() == node->numOutputs());
+
+    std::vector<int32_t> outputs_vec(node->numOutputs());
+
+    for (auto out : custom_outputs)
+    {
+      auto custom_out = loco::must_cast<luci::CircleCustomOut *>(out);
+      if (custom_out->index() >= int32_t(node->numOutputs()))
+        INTERNAL_EXN("Invalid Custom output");
+      outputs_vec[custom_out->index()] = luci::get_tensor_index(custom_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleIf *node) final
+  {
+    auto if_outs = loco::succs(node);
+    assert(if_outs.size() == node->output_count());
+
+    std::vector<int32_t> outputs_vec(node->output_count());
+
+    for (auto out : if_outs)
+    {
+      auto if_out = loco::must_cast<luci::CircleIfOut *>(out);
+      if (if_out->index() >= int32_t(node->output_count()))
+        INTERNAL_EXN("Invalid If output");
+      outputs_vec[if_out->index()] = luci::get_tensor_index(if_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV4 *node) final
+  {
+    auto nms_outs = loco::succs(node);
+    assert(nms_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV4Out *>(out);
+      if (nms_out->index() >= 2)
+        INTERNAL_EXN("Invalid NonMaxSuppressionV4 output");
+      outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleNonMaxSuppressionV5 *node) final
+  {
+    auto nms_outs = loco::succs(node);
+    assert(nms_outs.size() == 3);
+
+    std::vector<int32_t> outputs_vec(3);
+
+    for (auto out : nms_outs)
+    {
+      auto nms_out = loco::must_cast<luci::CircleNonMaxSuppressionV5Out *>(out);
+      if (nms_out->index() >= 3)
+        INTERNAL_EXN("Invalid NonMaxSuppressionV5 output");
+      outputs_vec[nms_out->index()] = luci::get_tensor_index(nms_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleSplit *node) final
+  {
+    auto split_outs = loco::succs(node);
+    assert(int32_t(split_outs.size()) == node->num_split());
+
+    std::vector<int32_t> outputs_vec(node->num_split());
+
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitOut *>(out);
+      if (split_out->index() >= node->num_split())
+        INTERNAL_EXN("Invalid Split output");
+      outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleSplitV *node) final
+  {
+    auto split_outs = loco::succs(node);
+    assert(int32_t(split_outs.size()) == node->num_split());
+
+    std::vector<int32_t> outputs_vec(node->num_split());
+
+    for (auto out : split_outs)
+    {
+      auto split_out = loco::must_cast<luci::CircleSplitVOut *>(out);
+      if (split_out->index() >= node->num_split())
+        INTERNAL_EXN("Invalid SplitV output");
+      outputs_vec[split_out->index()] = luci::get_tensor_index(split_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleTopKV2 *node) final
+  {
+    auto topkv2_outs = loco::succs(node);
+    assert(topkv2_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : topkv2_outs)
+    {
+      auto topkv2_out = loco::must_cast<luci::CircleTopKV2Out *>(out);
+      if (topkv2_out->index() >= 2)
+        INTERNAL_EXN("Invalid TopKV2 output");
+      outputs_vec[topkv2_out->index()] = luci::get_tensor_index(topkv2_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleUnique *node) final
+  {
+    auto unique_outs = loco::succs(node);
+    assert(unique_outs.size() == 2);
+
+    std::vector<int32_t> outputs_vec(2);
+
+    for (auto out : unique_outs)
+    {
+      auto unique_out = loco::must_cast<luci::CircleUniqueOut *>(out);
+      if (unique_out->index() >= 2)
+        INTERNAL_EXN("Invalid Unique output");
+      outputs_vec[unique_out->index()] = luci::get_tensor_index(unique_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleUnpack *node) final
+  {
+    auto unpack_outs = loco::succs(node);
+    assert(int32_t(unpack_outs.size()) == node->num());
+
+    std::vector<int32_t> outputs_vec(node->num());
+
+    for (auto out : unpack_outs)
+    {
+      auto unpack_out = loco::must_cast<luci::CircleUnpackOut *>(out);
+      if (unpack_out->index() >= node->num())
+        INTERNAL_EXN("Invalid Unpack output");
+      outputs_vec[unpack_out->index()] = luci::get_tensor_index(unpack_out);
+    }
+
+    return outputs_vec;
+  }
+
+  std::vector<int32_t> visit(luci::CircleWhile *node) final
+  {
+    auto while_outs = loco::succs(node);
+    assert(while_outs.size() == node->output_count());
+
+    std::vector<int32_t> outputs_vec(node->output_count());
+
+    for (auto out : while_outs)
+    {
+      auto while_out = loco::must_cast<luci::CircleWhileOut *>(out);
+      if (while_out->index() >= int32_t(node->output_count()))
+        INTERNAL_EXN("Invalid While output");
+      outputs_vec[while_out->index()] = luci::get_tensor_index(while_out);
+    }
+
+    return outputs_vec;
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+void OperationExporterRule::visit(luci::CircleNode *node)
+{
+  auto op_idx = _ctx.md.registerBuiltinOpcode(circle_builtin_operator(node),
+                                              circle_custom_code(node), node->op_version());
+
+  std::vector<int32_t> inputs_vec;
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    inputs_vec.push_back(luci::get_tensor_index(node->arg(i)));
+  auto inputs = _ctx.builder.CreateVector(inputs_vec);
+
+  OutputVectorExtractor outputs_vec_extractor;
+  auto outputs_vec = node->accept(&outputs_vec_extractor);
+  auto outputs = _ctx.builder.CreateVector(outputs_vec);
+
+  auto builtin_options = circle_builtin_options(node);
+
+  luci::BuiltinOptionsExtractor builtin_options_extractor(_ctx.builder);
+  auto options_offset = node->accept(&builtin_options_extractor);
+
+  // If node is not CircleCustom, null offset(0) is returned
+  auto custom_options = circle_custom_options(_ctx.builder, node);
+
+  auto op_offset = circle::CreateOperator(_ctx.builder, op_idx, inputs, outputs, builtin_options,
+                                          options_offset, custom_options);
+  _ctx.gd._operators.push_back(op_offset);
+}
+
+} // namespace luci
diff --git a/compiler/luci/export/src/CircleOperationExporterRule.h b/compiler/luci/export/src/CircleOperationExporterRule.h

new file mode 100644 (file)

index 0000000..23e7546
--- /dev/null
+++ b/compiler/luci/export/src/CircleOperationExporterRule.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPERATION_EXPORTER_RULE_H__
+#define __CIRCLE_OPERATION_EXPORTER_RULE_H__
+
+#include "CircleOperationExporter.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+struct ExportContext
+{
+  flatbuffers::FlatBufferBuilder &builder;
+  luci::SerializedModelData &md;
+  luci::SerializedGraphData &gd;
+};
+
+class OperationExporterRule final : public luci::CircleNodeMutableVisitor<void>
+{
+public:
+  OperationExporterRule(ExportContext &ctx) : _ctx{ctx}
+  {
+    // DO NOTHING
+  }
+
+public:
+  // Default export rule
+  void visit(luci::CircleNode *node) final;
+
+  // Non-virtual
+  void visit(luci::CircleConst *) final{/* skip, everything is done in exportOpDefinedTensors */};
+
+  // Virtual
+  void visit(luci::CircleInput *) final {}
+  void visit(luci::CircleOutput *) final {}
+  void visit(luci::CircleOutputDummy *) final {}
+  void visit(luci::CircleOutputExclude *) final {}
+  // Virtual for multiple-outputs
+  void visit(luci::CircleBidirectionalSequenceLSTMOut *) final {}
+  void visit(luci::CircleCustomOut *) final {}
+  void visit(luci::CircleIfOut *) final {}
+  void visit(luci::CircleNonMaxSuppressionV4Out *) final {}
+  void visit(luci::CircleNonMaxSuppressionV5Out *) final {}
+  void visit(luci::CircleSplitOut *) final {}
+  void visit(luci::CircleSplitVOut *) final {}
+  void visit(luci::CircleTopKV2Out *) final {}
+  void visit(luci::CircleUniqueOut *) final {}
+  void visit(luci::CircleUnpackOut *) final {}
+  void visit(luci::CircleVariable *) final {}
+  void visit(luci::CircleWhileOut *) final {}
+
+protected:
+  ExportContext &_ctx;
+};
+
+} // namespace luci
+
+#endif // __CIRCLE_OPERATION_EXPORTER_RULE_H__
diff --git a/compiler/luci/export/src/CircleOps.lst b/compiler/luci/export/src/CircleOps.lst

new file mode 100644 (file)

index 0000000..1b69093
--- /dev/null
+++ b/compiler/luci/export/src/CircleOps.lst
@@ -0,0 +1,154 @@
+#ifndef CIRCLE_NODE
+#error "Define CIRCLE_NODE"
+#endif // CIRCLE_NODE
+
+#ifndef CIRCLE_VNODE
+#error "Define CIRCLE_VNODE"
+#endif // CIRCLE_VNODE
+
+//
+// PLEASE SORT NODE DECLS IN ALPHABETICAL ORDER
+//
+// NOTE : CIRCLE_VNODE does not have any additional parameters
+//        because they are not circle builtin operators
+//        Please add parameters when they are needed.
+//
+// CIRCLE_NODE(CircleNode, circle::BuiltinOperator, circle::BuiltinOptions)
+// CIRCLE_VNODE(CircleNode)
+//
+
+CIRCLE_NODE(CircleAbs, BuiltinOperator_ABS, BuiltinOptions_AbsOptions)
+CIRCLE_NODE(CircleAdd, BuiltinOperator_ADD, BuiltinOptions_AddOptions)
+CIRCLE_NODE(CircleAddN, BuiltinOperator_ADD_N, BuiltinOptions_AddNOptions)
+CIRCLE_NODE(CircleArgMax, BuiltinOperator_ARG_MAX, BuiltinOptions_ArgMaxOptions)
+CIRCLE_NODE(CircleArgMin, BuiltinOperator_ARG_MIN, BuiltinOptions_ArgMinOptions)
+CIRCLE_NODE(CircleAveragePool2D, BuiltinOperator_AVERAGE_POOL_2D , BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleBatchToSpaceND, BuiltinOperator_BATCH_TO_SPACE_ND, BuiltinOptions_BatchToSpaceNDOptions)
+CIRCLE_NODE(CircleBatchMatMul, BuiltinOperator_BATCH_MATMUL, BuiltinOptions_BatchMatMulOptions)
+CIRCLE_NODE(CircleBidirectionalSequenceLSTM, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_BidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleCast, BuiltinOperator_CAST, BuiltinOptions_CastOptions)
+CIRCLE_NODE(CircleCeil, BuiltinOperator_CEIL, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleConcatenation, BuiltinOperator_CONCATENATION, BuiltinOptions_ConcatenationOptions)
+CIRCLE_NODE(CircleConv2D, BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions)
+CIRCLE_NODE(CircleCos, BuiltinOperator_COS, BuiltinOptions_CosOptions)
+CIRCLE_NODE(CircleCustom, BuiltinOperator_CUSTOM, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleDepthToSpace, BuiltinOperator_DEPTH_TO_SPACE, BuiltinOptions_DepthToSpaceOptions)
+CIRCLE_NODE(CircleDepthwiseConv2D, BuiltinOperator_DEPTHWISE_CONV_2D, BuiltinOptions_DepthwiseConv2DOptions)
+CIRCLE_NODE(CircleDequantize, BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions)
+CIRCLE_NODE(CircleDiv, BuiltinOperator_DIV, BuiltinOptions_DivOptions)
+CIRCLE_NODE(CircleElu, BuiltinOperator_ELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleEqual, BuiltinOperator_EQUAL, BuiltinOptions_EqualOptions)
+CIRCLE_NODE(CircleExp, BuiltinOperator_EXP, BuiltinOptions_ExpOptions)
+CIRCLE_NODE(CircleExpandDims, BuiltinOperator_EXPAND_DIMS, BuiltinOptions_ExpandDimsOptions)
+CIRCLE_NODE(CircleFakeQuant, BuiltinOperator_FAKE_QUANT, BuiltinOptions_FakeQuantOptions)
+CIRCLE_NODE(CircleFill, BuiltinOperator_FILL, BuiltinOptions_FillOptions)
+CIRCLE_NODE(CircleFloor, BuiltinOperator_FLOOR, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleFloorDiv, BuiltinOperator_FLOOR_DIV, BuiltinOptions_FloorDivOptions)
+CIRCLE_NODE(CircleFloorMod, BuiltinOperator_FLOOR_MOD, BuiltinOptions_FloorModOptions)
+CIRCLE_NODE(CircleFullyConnected, BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions)
+CIRCLE_NODE(CircleGather, BuiltinOperator_GATHER, BuiltinOptions_GatherOptions)
+CIRCLE_NODE(CircleGatherNd, BuiltinOperator_GATHER_ND, BuiltinOptions_GatherNdOptions)
+CIRCLE_NODE(CircleGreater, BuiltinOperator_GREATER, BuiltinOptions_GreaterOptions)
+CIRCLE_NODE(CircleGreaterEqual, BuiltinOperator_GREATER_EQUAL, BuiltinOptions_GreaterEqualOptions)
+CIRCLE_NODE(CircleIf, BuiltinOperator_IF, BuiltinOptions_IfOptions)
+CIRCLE_NODE(CircleL2Normalize, BuiltinOperator_L2_NORMALIZATION, BuiltinOptions_L2NormOptions)
+CIRCLE_NODE(CircleL2Pool2D, BuiltinOperator_L2_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleLeakyRelu, BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions)
+CIRCLE_NODE(CircleLess, BuiltinOperator_LESS, BuiltinOptions_LessOptions)
+CIRCLE_NODE(CircleLessEqual, BuiltinOperator_LESS_EQUAL, BuiltinOptions_LessEqualOptions)
+CIRCLE_NODE(CircleLocalResponseNormalization, BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, BuiltinOptions_LocalResponseNormalizationOptions)
+CIRCLE_NODE(CircleLog, BuiltinOperator_LOG, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogicalAnd, BuiltinOperator_LOGICAL_AND, BuiltinOptions_LogicalAndOptions)
+CIRCLE_NODE(CircleLogicalNot, BuiltinOperator_LOGICAL_NOT, BuiltinOptions_LogicalNotOptions)
+CIRCLE_NODE(CircleLogicalOr, BuiltinOperator_LOGICAL_OR, BuiltinOptions_LogicalOrOptions)
+CIRCLE_NODE(CircleLogistic, BuiltinOperator_LOGISTIC, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleLogSoftmax, BuiltinOperator_LOG_SOFTMAX, BuiltinOptions_LogSoftmaxOptions)
+CIRCLE_NODE(CircleMatrixDiag, BuiltinOperator_MATRIX_DIAG, BuiltinOptions_MatrixDiagOptions)
+CIRCLE_NODE(CircleMaxPool2D, BuiltinOperator_MAX_POOL_2D, BuiltinOptions_Pool2DOptions)
+CIRCLE_NODE(CircleMatrixSetDiag, BuiltinOperator_MATRIX_SET_DIAG, BuiltinOptions_MatrixSetDiagOptions)
+CIRCLE_NODE(CircleMaximum, BuiltinOperator_MAXIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMean, BuiltinOperator_MEAN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleMinimum, BuiltinOperator_MINIMUM, BuiltinOptions_MaximumMinimumOptions)
+CIRCLE_NODE(CircleMirrorPad, BuiltinOperator_MIRROR_PAD, BuiltinOptions_MirrorPadOptions)
+CIRCLE_NODE(CircleMul, BuiltinOperator_MUL, BuiltinOptions_MulOptions)
+CIRCLE_NODE(CircleNeg, BuiltinOperator_NEG, BuiltinOptions_NegOptions)
+CIRCLE_NODE(CircleNonMaxSuppressionV4, BuiltinOperator_NON_MAX_SUPPRESSION_V4, BuiltinOptions_NonMaxSuppressionV4Options)
+CIRCLE_NODE(CircleNonMaxSuppressionV5, BuiltinOperator_NON_MAX_SUPPRESSION_V5, BuiltinOptions_NonMaxSuppressionV5Options)
+CIRCLE_NODE(CircleNotEqual, BuiltinOperator_NOT_EQUAL, BuiltinOptions_NotEqualOptions)
+CIRCLE_NODE(CircleOneHot, BuiltinOperator_ONE_HOT, BuiltinOptions_OneHotOptions)
+CIRCLE_NODE(CirclePack, BuiltinOperator_PACK, BuiltinOptions_PackOptions)
+CIRCLE_NODE(CirclePad, BuiltinOperator_PAD, BuiltinOptions_PadOptions)
+CIRCLE_NODE(CirclePadV2, BuiltinOperator_PADV2, BuiltinOptions_PadV2Options)
+CIRCLE_NODE(CirclePow, BuiltinOperator_POW, BuiltinOptions_PowOptions)
+CIRCLE_NODE(CirclePRelu, BuiltinOperator_PRELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleQuantize, BuiltinOperator_QUANTIZE, BuiltinOptions_QuantizeOptions)
+CIRCLE_NODE(CircleRange, BuiltinOperator_RANGE, BuiltinOptions_RangeOptions)
+CIRCLE_NODE(CircleRank, BuiltinOperator_RANK, BuiltinOptions_RankOptions)
+CIRCLE_NODE(CircleReduceAny, BuiltinOperator_REDUCE_ANY, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMax, BuiltinOperator_REDUCE_MAX, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceMin, BuiltinOperator_REDUCE_MIN, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleReduceProd, BuiltinOperator_REDUCE_PROD, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleRelu, BuiltinOperator_RELU, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRelu6, BuiltinOperator_RELU6, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReluN1To1, BuiltinOperator_RELU_N1_TO_1, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleReshape, BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions)
+CIRCLE_NODE(CircleResizeBilinear, BuiltinOperator_RESIZE_BILINEAR, BuiltinOptions_ResizeBilinearOptions)
+CIRCLE_NODE(CircleResizeNearestNeighbor, BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, BuiltinOptions_ResizeNearestNeighborOptions)
+CIRCLE_NODE(CircleReverseSequence, BuiltinOperator_REVERSE_SEQUENCE, BuiltinOptions_ReverseSequenceOptions)
+CIRCLE_NODE(CircleReverseV2, BuiltinOperator_REVERSE_V2, BuiltinOptions_ReverseV2Options)
+CIRCLE_NODE(CircleRound, BuiltinOperator_ROUND, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleRsqrt, BuiltinOperator_RSQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleScatterNd, BuiltinOperator_SCATTER_ND, BuiltinOptions_ScatterNdOptions)
+CIRCLE_NODE(CircleSegmentSum, BuiltinOperator_SEGMENT_SUM, BuiltinOptions_SegmentSumOptions)
+CIRCLE_NODE(CircleSelect, BuiltinOperator_SELECT, BuiltinOptions_SelectOptions)
+CIRCLE_NODE(CircleSelectV2, BuiltinOperator_SELECT_V2, BuiltinOptions_SelectV2Options)
+CIRCLE_NODE(CircleShape, BuiltinOperator_SHAPE, BuiltinOptions_ShapeOptions)
+CIRCLE_NODE(CircleSin, BuiltinOperator_SIN, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSlice, BuiltinOperator_SLICE, BuiltinOptions_SliceOptions)
+CIRCLE_NODE(CircleSoftmax, BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions)
+CIRCLE_NODE(CircleSpaceToBatchND, BuiltinOperator_SPACE_TO_BATCH_ND, BuiltinOptions_SpaceToBatchNDOptions)
+CIRCLE_NODE(CircleSpaceToDepth, BuiltinOperator_SPACE_TO_DEPTH, BuiltinOptions_SpaceToDepthOptions)
+CIRCLE_NODE(CircleSparseToDense, BuiltinOperator_SPARSE_TO_DENSE, BuiltinOptions_SparseToDenseOptions)
+CIRCLE_NODE(CircleSplit, BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions)
+CIRCLE_NODE(CircleSplitV, BuiltinOperator_SPLIT_V, BuiltinOptions_SplitVOptions)
+CIRCLE_NODE(CircleSqrt, BuiltinOperator_SQRT, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleSquare, BuiltinOperator_SQUARE, BuiltinOptions_SquareOptions)
+CIRCLE_NODE(CircleSquaredDifference, BuiltinOperator_SQUARED_DIFFERENCE, BuiltinOptions_SquaredDifferenceOptions)
+CIRCLE_NODE(CircleSqueeze, BuiltinOperator_SQUEEZE, BuiltinOptions_SqueezeOptions)
+CIRCLE_NODE(CircleStridedSlice, BuiltinOperator_STRIDED_SLICE, BuiltinOptions_StridedSliceOptions)
+CIRCLE_NODE(CircleSub, BuiltinOperator_SUB, BuiltinOptions_SubOptions)
+CIRCLE_NODE(CircleSum, BuiltinOperator_SUM, BuiltinOptions_ReducerOptions)
+CIRCLE_NODE(CircleSVDF, BuiltinOperator_SVDF, BuiltinOptions_SVDFOptions)
+CIRCLE_NODE(CircleTanh, BuiltinOperator_TANH, BuiltinOptions_NONE)
+CIRCLE_NODE(CircleTile, BuiltinOperator_TILE, BuiltinOptions_TileOptions)
+CIRCLE_NODE(CircleTopKV2, BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options)
+CIRCLE_NODE(CircleTranspose, BuiltinOperator_TRANSPOSE, BuiltinOptions_TransposeOptions)
+CIRCLE_NODE(CircleTransposeConv, BuiltinOperator_TRANSPOSE_CONV, BuiltinOptions_TransposeConvOptions)
+CIRCLE_NODE(CircleUnidirectionalSequenceLSTM, BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOptions_UnidirectionalSequenceLSTMOptions)
+CIRCLE_NODE(CircleUnique, BuiltinOperator_UNIQUE, BuiltinOptions_UniqueOptions)
+CIRCLE_NODE(CircleUnpack, BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions)
+CIRCLE_NODE(CircleWhere, BuiltinOperator_WHERE, BuiltinOptions_WhereOptions)
+CIRCLE_NODE(CircleWhile, BuiltinOperator_WHILE, BuiltinOptions_WhileOptions)
+CIRCLE_NODE(CircleZerosLike, BuiltinOperator_ZEROS_LIKE, BuiltinOptions_ZerosLikeOptions)
+// Circle Only
+CIRCLE_NODE(CircleBCQFullyConnected, BuiltinOperator_BCQ_FULLY_CONNECTED, BuiltinOptions_BCQFullyConnectedOptions)
+CIRCLE_NODE(CircleBCQGather, BuiltinOperator_BCQ_GATHER, BuiltinOptions_BCQGatherOptions)
+CIRCLE_NODE(CircleInstanceNorm, BuiltinOperator_INSTANCE_NORM, BuiltinOptions_InstanceNormOptions)
+// Virtual node(s)
+CIRCLE_VNODE(CircleBidirectionalSequenceLSTMOut)
+CIRCLE_VNODE(CircleConst)
+CIRCLE_VNODE(CircleInput)
+CIRCLE_VNODE(CircleOutput)
+CIRCLE_VNODE(CircleOutputDummy)
+CIRCLE_VNODE(CircleOutputExclude)
+CIRCLE_VNODE(CircleCustomOut)
+CIRCLE_VNODE(CircleIfOut)
+CIRCLE_VNODE(CircleNonMaxSuppressionV4Out)
+CIRCLE_VNODE(CircleNonMaxSuppressionV5Out)
+CIRCLE_VNODE(CircleSplitOut)
+CIRCLE_VNODE(CircleSplitVOut)
+CIRCLE_VNODE(CircleTopKV2Out)
+CIRCLE_VNODE(CircleUniqueOut)
+CIRCLE_VNODE(CircleUnpackOut)
+CIRCLE_VNODE(CircleVariable)
+CIRCLE_VNODE(CircleWhileOut)
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp

index 615402aa8b9ba98facd9c4582b5f9317f7023e01..b3bb850cc6fde6221f6d238524ee3a6849ab6cd0 100644 (file)
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -67,6 +67,9 @@ public:
    luci::SparsityParam *sparsityparam(void) const { return _sparsityparam; }
    void sparsityparam(luci::SparsityParam *sp) { _sparsityparam = sp; }
  
+  bool is_variable(void) const { return _is_variable; }
+  void is_variable(bool v) { _is_variable = v; }
+
  private:
    std::string _name;
  
@@ -77,6 +80,8 @@ private:
    luci::CircleConst *_content = nullptr;
    luci::CircleQuantParam *_quantparam = nullptr;
    luci::SparsityParam *_sparsityparam = nullptr;
+
+  bool _is_variable = false;
  };
  
  class CircleTensorContext
@@ -145,6 +150,8 @@ void allocateCircleTensorInfo(CircleNode *node, CircleTensorContext &ctx)
    tensor_info.quantparam(node->quantparam());
    tensor_info.sparsityparam(node->sparsityparam());
  
+  tensor_info.is_variable(dynamic_cast<luci::CircleVariable *>(node) != nullptr);
+
    set_tensor_index(node, tensor_index);
  
    ctx.emplace_back(tensor_info);
@@ -592,9 +599,11 @@ void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &buil
    auto buffer_id = get_buffer_id(builder, md, info.content());
  
    auto name_offset = builder.CreateString(info.name());
-  auto tensor_offset =
-    CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
-                 /*is_variable*/ false, sparsityparam, shape_signature_offset);
+
+  auto is_variable = info.is_variable();
+
+  auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
+                                    quantparam, is_variable, sparsityparam, shape_signature_offset);
    gd._tensors.push_back(tensor_offset);
  }
  
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h

index a945eecf7726a62819546d8542fbbb502b64c8f4..136a8ac490e1bd95baf66bf52a03bd4777385a2c 100644 (file)
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -23,7 +23,7 @@
  #include <luci/IR/ExecutionPlanTable.h>
  
  #include <vector>
-
+#include <string>
  #include <unordered_map>
  #include <map>
  
@@ -131,8 +131,8 @@ struct SerializedModelData final
     * @param builtin_code
     * @return idx of opcode in table of opcodes (see schema)
     */
-  uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code, const int32_t op_version);
-  uint32_t registerCustomOpcode(const std::string &custom_op);
+  uint32_t registerBuiltinOpcode(circle::BuiltinOperator builtin_code,
+                                 const std::string &custom_code, const int32_t op_version);
  };
  
  // Prerequisites for circle::Model object creation
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt

index 6630cab9f14eff5fe62e735133b0285070c92ddf..1b2db23ae2a1f5826e57fd91ff80326dcde79dd5 100644 (file)
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -12,13 +12,14 @@ target_include_directories(luci_import PUBLIC include)
  target_link_libraries(luci_import PUBLIC luci_lang)
  target_link_libraries(luci_import PUBLIC luci_profile)
  target_link_libraries(luci_import PUBLIC luci_plan)
-target_link_libraries(luci_import PUBLIC mio_circle)
+target_link_libraries(luci_import PUBLIC mio_circle04)
  target_link_libraries(luci_import PRIVATE luci_env)
  target_link_libraries(luci_import PRIVATE luci_log)
  target_link_libraries(luci_import PRIVATE luci_logex)
  target_link_libraries(luci_import PRIVATE nncc_common)
  target_link_libraries(luci_import PRIVATE locop)
  target_link_libraries(luci_import PRIVATE oops)
+target_link_libraries(luci_import PRIVATE mio_circle04_helper)
  install(TARGETS luci_import DESTINATION lib)
  install(DIRECTORY include/ DESTINATION include
          FILES_MATCHING PATTERN "*.h")
@@ -32,7 +33,3 @@ nnas_find_package(GTest REQUIRED)
  GTest_AddTest(luci_import_test ${TESTS})
  target_include_directories(luci_import_test PRIVATE src)
  target_link_libraries(luci_import_test luci_import)
-target_link_libraries(luci_import_test oops)
-target_link_libraries(luci_import_test luci_plan)
-target_link_libraries(luci_import_test luci_lang)
-target_link_libraries(luci_import_test mio_circle)
diff --git a/compiler/luci/import/include/luci/Import/CircleReader.h b/compiler/luci/import/include/luci/Import/CircleReader.h

index fb38ba90bbbdc26961355959414ef2d9791e269e..a0519f661e40ec198a723754cc3ec6f553ec6c5a 100644 (file)
--- a/compiler/luci/import/include/luci/Import/CircleReader.h
+++ b/compiler/luci/import/include/luci/Import/CircleReader.h
@@ -35,19 +35,7 @@
  namespace luci
  {
  
-bool is_valid(const circle::OperatorCodeT &opcode);
-bool is_valid(const circle::OperatorCode *opcode);
-
-bool is_custom(const circle::OperatorCodeT &opcode);
-bool is_custom(const circle::OperatorCode *opcode);
-
-std::string opcode_name(const circle::OperatorCodeT &opcode);
-std::string opcode_name(const circle::OperatorCode *opcode);
-
-const char *tensor_name(const circle::TensorT &tensor);
  const char *tensor_name(const circle::Tensor *tensor);
-
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor);
  const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor);
  
  loco::DataType luci_datatype(circle::TensorType type);
@@ -57,14 +45,13 @@ MirrorPadMode luci_mirrorpad_mode(const circle::MirrorPadMode mode);
  luci::CircleFullyConnected::WeightsFormat
  luci_weights_format(const circle::FullyConnectedOptionsWeightsFormat weights_format);
  std::unique_ptr<CircleQuantParam>
-luci_quantparam(const circle::QuantizationParametersT *quantization);
-std::unique_ptr<CircleQuantParam>
  luci_quantparam(const circle::QuantizationParameters *quantization);
  
  /// @brief Copy common tensor attributes such as name, type, etc. to node.
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node);
  void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node);
  
+std::string fb_string2std_string(const flatbuffers::String *fb_str);
+
  /**
   * @brief Wrapper to use flatbuffers::Vector pointer as std::vector entity
   */
@@ -101,13 +88,6 @@ template <typename T> VectorWrapper<T> wrap(const flatbuffers::Vector<T> *vec)
   */
  class CircleReader
  {
-private: // unpack API
-  using CircleBuffers_t = std::vector<std::unique_ptr<circle::BufferT>>;
-  using CircleTensors_t = std::vector<std::unique_ptr<circle::TensorT>>;
-  using CircleOperators_t = std::vector<std::unique_ptr<circle::OperatorT>>;
-  using CircleOperatorCodes_t = std::vector<std::unique_ptr<circle::OperatorCodeT>>;
-  using CircleMetadata_t = std::vector<std::unique_ptr<circle::MetadataT>>;
-
  private: // direct API
    using CircleBuffers = VectorWrapper<flatbuffers::Offset<circle::Buffer>>;
    using CircleTensors = VectorWrapper<flatbuffers::Offset<circle::Tensor>>;
@@ -115,40 +95,21 @@ private: // direct API
    using CircleOperatorCodes = VectorWrapper<flatbuffers::Offset<circle::OperatorCode>>;
    using CircleMetadataSet = VectorWrapper<flatbuffers::Offset<circle::Metadata>>;
  
-  using CircleSubGraphsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::SubGraph>>;
-  using CircleTensorsPtr_t = flatbuffers::Vector<flatbuffers::Offset<circle::Tensor>>;
-
  public:
    CircleReader() = default;
  
-public: // unpack API
-  const CircleOperatorCodes_t &opcodes() const { return _model->operator_codes; }
-  const CircleBuffers_t &buffers() const { return _model->buffers; }
-  const CircleTensors_t &tensors() const { return _current_subgraph->tensors; }
-  const CircleOperators_t &operators() const { return _current_subgraph->operators; }
-  const std::vector<int32_t> &inputs() const { return _current_subgraph->inputs; }
-  const std::vector<int32_t> &outputs() const { return _current_subgraph->outputs; }
-  const std::string &name() const { return _current_subgraph->name; }
-  const circle::DataFormat &data_format() const { return _current_subgraph->data_format; }
-  const CircleMetadata_t &metadata() const { return _model->metadata; }
-
-  const CircleTensorsPtr_t *tensors_ptr() const { return _tensors_ptr; }
-
-  uint32_t num_subgraph() const { return _model->subgraphs.size(); }
-
-  circle::BuiltinOperator builtin_code(const circle::OperatorT &op) const;
-  std::string opcode_name(const circle::OperatorT &op) const;
-
  public: // direct API
-  CircleOperatorCodes native_opcodes() const { return wrap(_native_model->operator_codes()); }
-  CircleBuffers native_buffers() const { return wrap(_native_model->buffers()); }
-  CircleTensors native_tensors() const { return wrap(_native_subgraph->tensors()); }
-  CircleOperators native_operators() const { return wrap(_native_subgraph->operators()); }
-  VectorWrapper<int32_t> native_inputs() const { return wrap(_native_subgraph->inputs()); }
-  VectorWrapper<int32_t> native_outputs() const { return wrap(_native_subgraph->outputs()); }
-  std::string native_name() const { return _native_subgraph->name()->str(); }
-  circle::DataFormat native_data_format() const { return _native_subgraph->data_format(); }
-  CircleMetadataSet native_metadata() const { return wrap(_native_model->metadata()); }
+  CircleOperatorCodes opcodes() const { return wrap(_model->operator_codes()); }
+  CircleBuffers buffers() const { return wrap(_model->buffers()); }
+  CircleTensors tensors() const { return wrap(_current_subgraph->tensors()); }
+  CircleOperators operators() const { return wrap(_current_subgraph->operators()); }
+  VectorWrapper<int32_t> inputs() const { return wrap(_current_subgraph->inputs()); }
+  VectorWrapper<int32_t> outputs() const { return wrap(_current_subgraph->outputs()); }
+  std::string name() const { return fb_string2std_string(_current_subgraph->name()); }
+  circle::DataFormat data_format() const { return _current_subgraph->data_format(); }
+  CircleMetadataSet metadata() const { return wrap(_model->metadata()); }
+
+  uint32_t num_subgraph() const { return wrap(_model->subgraphs()).size(); }
  
    circle::BuiltinOperator builtin_code(const circle::Operator *op) const;
    std::string opcode_name(const circle::Operator *op) const;
@@ -158,12 +119,8 @@ public:
    bool select_subgraph(uint32_t subgraph);
  
  private:
-  std::unique_ptr<const circle::ModelT> _model;
-  const circle::SubGraphT *_current_subgraph{nullptr};
-
-  const circle::Model *_native_model{nullptr};
-  const CircleTensorsPtr_t *_tensors_ptr{nullptr};
-  const circle::SubGraph *_native_subgraph{nullptr};
+  const circle::Model *_model{nullptr};
+  const circle::SubGraph *_current_subgraph{nullptr};
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h

index b8dc22fdd0c545e3d357914e12a846f839c99315..93e34a56b993d3adb666656792c4c9a28689a762 100644 (file)
--- a/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
+++ b/compiler/luci/import/include/luci/Import/GraphBuilderRegistry.h
@@ -18,6 +18,7 @@
  #define __LUCI_IMPORT_GRAPH_BUILDER_REGISTRY_H__
  
  #include "GraphBuilderBase.h"
+#include "NodeBuilder.h"
  
  #include <map>
  
@@ -32,6 +33,11 @@ struct GraphBuilderSource
     * @brief Returns registered GraphBuilder pointer for operator (nullptr if not present)
     */
    virtual const GraphBuilderBase *lookup(const circle::BuiltinOperator &op) const = 0;
+
+  /**
+   * @brief Returns registered NodeBuilderBase pointer for type (nullptr if not present)
+   */
+  virtual const NodeBuilderBase *lookup(const NodeBuilderType type) const = 0;
  };
  
  /**
@@ -61,6 +67,17 @@ public:
      return _builder_map.at(op).get();
    }
  
+  /**
+   * @brief Returns registered NodeBuilderBase pointer for type or nullptr if not registered
+   */
+  const NodeBuilderBase *lookup(const NodeBuilderType type) const final
+  {
+    if (_node_builders.find(type) == _node_builders.end())
+      return (_parent == nullptr) ? nullptr : _parent->lookup(type);
+
+    return _node_builders.at(type).get();
+  }
+
    static GraphBuilderRegistry &get()
    {
      static GraphBuilderRegistry me;
@@ -73,11 +90,17 @@ public:
      _builder_map[op] = std::move(builder);
    }
  
+  void add(std::unique_ptr<NodeBuilderBase> &&builder)
+  {
+    _node_builders[builder->builder_type()] = std::move(builder);
+  }
+
  private:
    const GraphBuilderSource *_parent = nullptr;
  
  private:
    std::map<const circle::BuiltinOperator, std::unique_ptr<GraphBuilderBase>> _builder_map;
+  std::map<const NodeBuilderType, std::unique_ptr<NodeBuilderBase>> _node_builders;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/include/luci/Import/NodeBuilder.h b/compiler/luci/import/include/luci/Import/NodeBuilder.h

new file mode 100644 (file)

index 0000000..440b491
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/NodeBuilder.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_NODE_BUILDER_H__
+#define __LUCI_IMPORT_NODE_BUILDER_H__
+
+#include "GraphBuilderContext.h"
+#include "GraphBuilderBase.h"
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+/**
+ * @brief Tensor types which requires separated node
+ */
+enum class NodeBuilderType
+{
+  BUFFER,
+  // TODO Extend this struct here if needed to add new type of NodeBuilderBase
+};
+
+/**
+ * @brief Creates nodes from given Tensor and context
+ */
+class NodeBuilderBase
+{
+public:
+  virtual CircleNode *build(TensorIndex tensor_idx, GraphBuilderContext *context) const = 0;
+  virtual NodeBuilderType builder_type() const = 0;
+};
+
+/**
+ * @brief Placeholder for builders of tensors with different types
+ */
+template <NodeBuilderType Type> class TypedNodeBuilder : public NodeBuilderBase
+{
+public:
+  NodeBuilderType builder_type() const final { return Type; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_NODE_BUILDER_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes.h b/compiler/luci/import/include/luci/Import/Nodes.h

index f7d22e7aae4c8b0d1f433550e8eebfb7be988791..7a5045ede71f3555bf2f4183caf045b1be7a7fc2 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes.h
+++ b/compiler/luci/import/include/luci/Import/Nodes.h
@@ -122,6 +122,7 @@
  #include "Nodes/CircleStridedSlice.h"
  #include "Nodes/CircleSub.h"
  #include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
  #include "Nodes/CircleTanh.h"
  #include "Nodes/CircleTile.h"
  #include "Nodes/CircleTopKV2.h"
@@ -130,6 +131,7 @@
  #include "Nodes/CircleUnidirectionalSequenceLSTM.h"
  #include "Nodes/CircleUnique.h"
  #include "Nodes/CircleUnpack.h"
+#include "Nodes/CircleVariable.h"
  #include "Nodes/CircleWhere.h"
  #include "Nodes/CircleWhile.h"
  #include "Nodes/CircleZerosLike.h"
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h

index 7d4f10a594f327d843aab80c170540f61eaaeaaf..9e50ddbdec34f203cdfd9dcded344dd00ee46b49 100644 (file)
--- a/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleConst.h
@@ -17,20 +17,21 @@
  #ifndef __LUCI_IMPORT_OP_CIRCLE_CONST_H__
  #define __LUCI_IMPORT_OP_CIRCLE_CONST_H__
  
-#include "luci/Import/GraphBuilderContext.h"
+#include "luci/Import/NodeBuilder.h"
  
  #include <luci/IR/Nodes/CircleConst.h>
  
-/*
- * @note  Circle does not have Const operator.
- *        Methods here provide helper that creates CircleConst from
- *        Tensor and Buffer in circle flatbuffer file.
- */
-
  namespace luci
  {
  
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index);
+/**
+ * @brief Builder creates CircleConst node from Tensor with buffer.
+ */
+class CircleConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
  
  } // namespace luci
  
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h

new file mode 100644 (file)

index 0000000..a91f660
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleSVDF.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+#define __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
+
+#include "luci/Import/GraphBuilder.h"
+
+namespace luci
+{
+
+class CircleSVDFBuilder : public GraphBuilder
+{
+public:
+  bool validate(const ValidateArgs &args) const final;
+
+private:
+  CircleNode *build_node(const circle::OperatorT &op, const std::vector<CircleNode *> &inputs,
+                         loco::Graph *graph) const final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_SVDF_H__
diff --git a/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h

new file mode 100644 (file)

index 0000000..4d8961f
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/Nodes/CircleVariable.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+#define __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
+
+#include "luci/Import/GraphBuilderContext.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+
+/*
+ * @note  Circle does not have node for variable tensor
+ *        Methods here provide helper that creates CircleVariable from
+ *        Tensor having is_variable true value.
+ */
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index);
+
+} // namespace luci
+
+#endif // __LUCI_IMPORT_OP_CIRCLE_VARIABLE_H__
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp

index 42dcebdaa85a79717a033a7d9f33da81f48ee51a..9c1fe73560613563c685a6e8d4914fdb57b85d4f 100644 (file)
--- a/compiler/luci/import/src/CircleImportMetadata.cpp
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -21,8 +21,10 @@
  namespace
  {
  
-uint32_t read_u32(const std::vector<uint8_t> &buffer, uint32_t idx)
+template <typename VECTORTYPE> uint32_t read_u32(const VECTORTYPE &buffer, uint32_t idx)
  {
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
    uint32_t val = 0;
    val += (buffer.at(idx + 0) << 0 * 8);
    val += (buffer.at(idx + 1) << 1 * 8);
@@ -37,9 +39,11 @@ namespace
  {
  
  // 'source_table' is decoded to std::map<uint32_t, std::string> format.
-const std::map<uint32_t, std::string>
-decoded_source_table(const std::vector<uint8_t> &source_table_data)
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::string> decoded_source_table(const VECTORTYPE &source_table_data)
  {
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
    std::map<uint32_t, std::string> source_id_name_map;
    uint32_t idx = 0;
  
@@ -86,9 +90,11 @@ decoded_source_table(const std::vector<uint8_t> &source_table_data)
  }
  
  // 'op_table' is decoded to std::map<uint32_t, std::set<uint32_t>> format.
-const std::map<uint32_t, std::set<uint32_t>>
-decoded_op_table(const std::vector<uint8_t> &op_table_data)
+template <typename VECTORTYPE>
+const std::map<uint32_t, std::set<uint32_t>> decoded_op_table(const VECTORTYPE &op_table_data)
  {
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
    std::map<uint32_t, std::set<uint32_t>> node_source_ids_map;
    uint32_t idx = 0;
  
@@ -135,9 +141,11 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data)
  }
  
  // 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format.
-const luci::ExecutionPlanTable
-decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
+template <typename VECTORTYPE>
+const luci::ExecutionPlanTable decoded_execution_plan(const VECTORTYPE &execution_plan_data)
  {
+  static_assert(std::is_same<typename VECTORTYPE::value_type, uint8_t>::value, "Types mismatch!");
+
    luci::ExecutionPlanTable execution_plan_table;
    uint32_t idx = 0;
  
@@ -156,6 +164,10 @@ decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
      idx += sizeof(uint32_t);
  
      uint32_t size = read_u32(execution_plan_data, idx);
+
+    if (size == 0)
+      throw std::runtime_error("Op table decode error : empty execution plan entry");
+
      idx += sizeof(uint32_t);
  
      if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
@@ -190,19 +202,22 @@ namespace luci
  
  CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
  {
-  const auto &metadata = reader.metadata();
+  const auto metadata = reader.metadata();
    for (uint32_t i = 0; i < metadata.size(); ++i)
    {
-    const circle::MetadataT &meta = *metadata[i];
+    const auto *meta = metadata[i];
+    assert(meta != nullptr);
  
-    assert(meta.buffer < reader.buffers().size());
-    const std::vector<uint8_t> &buffer = reader.buffers()[meta.buffer]->data;
+    assert(meta->buffer() < reader.buffers().size());
+    assert(reader.buffers()[meta->buffer()] != nullptr);
+    const auto buffer = luci::wrap(reader.buffers()[meta->buffer()]->data());
  
-    if (meta.name.compare("ONE_op_table") == 0)
+    assert(meta->name() != nullptr);
+    if (meta->name()->str().compare("ONE_op_table") == 0)
        _op_table = decoded_op_table(buffer);
-    else if (meta.name.compare("ONE_source_table") == 0)
+    else if (meta->name()->str().compare("ONE_source_table") == 0)
        _source_table = decoded_source_table(buffer);
-    else if (meta.name.compare("ONE_execution_plan_table") == 0)
+    else if (meta->name()->str().compare("ONE_execution_plan_table") == 0)
        _execution_plan_table = decoded_execution_plan(buffer);
    }
  }
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp

index 14917ba06f4bd80efc73e9ac368acf4bcfe2a66f..a42c3f91335a568011357292739b3af26655dc0a 100644 (file)
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -16,6 +16,9 @@
  
  #include "luci/Import/CircleReader.h"
  
+#include <mio_circle/Helper.h>
+
+#include <algorithm>
  #include <memory>
  #include <sstream>
  #include <string>
@@ -23,103 +26,14 @@
  namespace luci
  {
  
-bool is_valid(const circle::OperatorCodeT &opcode)
-{
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_valid(const circle::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (circle::BuiltinOperator_MIN <= code && code <= circle::BuiltinOperator_MAX);
-}
-
-bool is_custom(const circle::OperatorCodeT &opcode)
-{
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-bool is_custom(const circle::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return (code == circle::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const circle::OperatorCodeT &opcode)
-{
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (opcode.custom_code.empty())
-      return "(invalid custom)";
-
-    return opcode.custom_code;
-  }
-
-  circle::BuiltinOperator code = opcode.builtin_code;
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-std::string opcode_name(const circle::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    auto custom_code = opcode->custom_code()->str();
-    if (custom_code.empty())
-      return "(invalid custom)";
-
-    return custom_code;
-  }
-
-  circle::BuiltinOperator code = opcode->builtin_code();
-  return circle::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_name(const circle::TensorT &tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  if (!tensor.name.empty())
-    return tensor.name.c_str();
-
-  return kEmptyTensorName;
-}
-
  const char *tensor_name(const circle::Tensor *tensor)
  {
    assert(tensor != nullptr);
  
-  static const char *kEmptyTensorName = "(noname)";
-  const auto tensor_name = tensor->name()->c_str();
-
-  if (!std::string(tensor_name).empty())
-    return tensor_name;
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
  
-  return kEmptyTensorName;
-}
-
-const circle::QuantizationParametersT *tensor_quantization(const circle::TensorT &tensor)
-{
-  return tensor.quantization.get();
+  return tensor->name()->c_str();
  }
  
  const circle::QuantizationParameters *tensor_quantization(const circle::Tensor *tensor)
@@ -334,41 +248,6 @@ std::unique_ptr<SparsityParam> luci_sparsityparam(const circle::SparsityParamete
    return luci_sparsityparam(&sparsity);
  }
  
-void copy_tensor_attributes(const circle::TensorT &tensor, CircleNode *node)
-{
-  node->name(tensor_name(tensor));
-  node->dtype(luci_datatype(tensor.type));
-
-  assert(tensor.shape_signature.size() == 0 ||
-         tensor.shape_signature.size() == tensor.shape.size());
-
-  std::vector<int32_t> dims = tensor.shape; // in NHWC
-  node->rank(dims.size());
-  for (uint32_t r = 0; r < dims.size(); ++r)
-  {
-    if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
-      node->dim(r).unset();
-    else
-      node->dim(r).set(dims[r]);
-  }
-
-  const auto *quantization = tensor.quantization.get();
-  if (quantization != nullptr)
-  {
-    auto quantparam = luci_quantparam(quantization);
-    if (quantparam)
-      node->quantparam(std::move(quantparam));
-  }
-
-  const auto *sparsity = tensor.sparsity.get();
-  if (sparsity != nullptr)
-  {
-    auto sparsityparam = luci_sparsityparam(sparsity);
-    if (sparsityparam)
-      node->sparsityparam(std::move(sparsityparam));
-  }
-}
-
  void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
  {
    assert(tensor != nullptr);
@@ -408,63 +287,60 @@ void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
    }
  }
  
-circle::BuiltinOperator CircleReader::builtin_code(const circle::OperatorT &op) const
+std::string fb_string2std_string(const flatbuffers::String *fb_str)
  {
-  const auto &op_codes = opcodes();
-  uint32_t index = op.opcode_index;
+  return fb_str == nullptr ? "" : fb_str->str();
+}
+
+circle::BuiltinOperator CircleReader::builtin_code(const circle::Operator *op) const
+{
+  assert(op != nullptr);
+
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
    assert(index < op_codes.size());
-  const circle::OperatorCodeT &opcode = *op_codes[index];
+  const auto opcode = op_codes[index];
+  assert(opcode != nullptr);
  
-  return opcode.builtin_code;
+  return mio::circle::builtin_code_neutral(opcode);
  }
  
-std::string CircleReader::opcode_name(const circle::OperatorT &op) const
+std::string CircleReader::opcode_name(const circle::Operator *op) const
  {
-  const auto &op_codes = opcodes();
-  uint32_t index = op.opcode_index;
-  assert(index < op_codes.size());
-  const circle::OperatorCodeT &opcode = *op_codes[index];
+  assert(op != nullptr);
  
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid: " << index << ")";
-    return oss.str();
-  }
+  const auto op_codes = opcodes();
+  uint32_t index = op->opcode_index();
+  assert(index < op_codes.size());
+  const auto opcode = op_codes[index];
  
-  return ::luci::opcode_name(opcode);
+  return mio::circle::opcode_name(opcode);
  }
  
  bool CircleReader::parse(const circle::Model *model)
  {
    assert(model != nullptr);
  
-  _model.reset(model->UnPack());
-
    // for direct pointer access
-  _native_model = model;
+  _model = model;
  
    return true;
  }
  
  bool CircleReader::select_subgraph(uint32_t sgindex)
  {
-  if (_model->subgraphs.size() <= sgindex)
+  if (num_subgraph() <= sgindex)
    {
      assert(false);
      return false;
    }
  
-  _current_subgraph = _model->subgraphs[sgindex].get();
-
    // for direct pointer access
-  auto subgraphs = _native_model->subgraphs();
+  auto subgraphs = _model->subgraphs();
    assert(subgraphs != nullptr);
  
-  _native_subgraph = subgraphs->Get(sgindex);
-  assert(_native_subgraph != nullptr);
-
-  _tensors_ptr = _native_subgraph->tensors();
+  _current_subgraph = subgraphs->Get(sgindex);
+  assert(_current_subgraph != nullptr);
  
    return true;
  }
diff --git a/compiler/luci/import/src/GraphBuilder.cpp b/compiler/luci/import/src/GraphBuilder.cpp

index 356501c2fa36af95c80a6db4de79175c4795d2f3..59a08b5469309efcc00581b8221142aefb7cb0c2 100644 (file)
--- a/compiler/luci/import/src/GraphBuilder.cpp
+++ b/compiler/luci/import/src/GraphBuilder.cpp
@@ -29,10 +29,9 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext
  
    const std::vector<int32_t> &inputs = op.inputs;
    const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
+  assert(!tensors.null());
  
    std::vector<CircleNode *> input_nodes;
    for (const int32_t input_tensor_index : inputs)
@@ -60,16 +59,18 @@ CircleNode *GraphBuilder::build(const circle::OperatorT &op, GraphBuilderContext
    // Set up node parameters.
    assert(outputs.size() == 1);
    {
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
      copy_tensor_attributes(output_tensor, node);
      // mark shape_status
-    if (tensors_ptr->Get(outputs[0])->shape() == nullptr)
+    if (output_tensor->shape() == nullptr)
        node->shape_status(ShapeStatus::NOSHAPE);
      else
        node->shape_status(ShapeStatus::VALID);
  
      // mark operator version
-    node->op_version(opcodes[op.opcode_index].get()->version);
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
    }
  
    // Register node's only output.
diff --git a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp

index be553f4c09912449ec7f4cf0adcaf102f74295a5..4df8d1e5a778507630c90c527581ede966e60129 100644 (file)
--- a/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
+++ b/compiler/luci/import/src/GraphBuilderMultiOutput.cpp
@@ -30,10 +30,9 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
  
    const std::vector<int32_t> &inputs = op.inputs;
    const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
-  auto tensors_ptr = context->reader()->tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
+  assert(!tensors.null());
  
    std::vector<CircleNode *> input_nodes;
    for (const int32_t input_tensor_index : inputs)
@@ -64,12 +63,14 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
    if (output_count > 0)
    {
      // Let's use attributes from output 0 for this node
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
      node->name(tensor_name(output_tensor));
-    node->dtype(luci_datatype(output_tensor.type));
+    node->dtype(luci_datatype(output_tensor->type()));
  
      // mark operator version
-    node->op_version(opcodes[op.opcode_index].get()->version);
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
  
      // NOTE We don't set quantization for multiple output nodes but to virtual outputs
    }
@@ -77,7 +78,8 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
    // Create virtual outputs of Virtual Output node(s)
    for (uint32_t n = 0; n < output_count; ++n)
    {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+    const auto output_tensor = tensors[outputs[n]];
+    assert(output_tensor != nullptr);
  
      BuildOutArgs boa(node, n);
      auto *nodeout = build_out(boa);
@@ -85,7 +87,7 @@ CircleNode *GraphBuilderMultiOutput::build(const circle::OperatorT &op,
      copy_tensor_attributes(output_tensor, nodeout);
      // NOTE name of CxxxOut nodes may have same name
      // mark shape_status
-    if (tensors_ptr->Get(outputs[n])->shape() == nullptr)
+    if (output_tensor->shape() == nullptr)
        nodeout->shape_status(ShapeStatus::NOSHAPE);
      else
        nodeout->shape_status(ShapeStatus::VALID);
diff --git a/compiler/luci/import/src/GraphBuilderRegistry.cpp b/compiler/luci/import/src/GraphBuilderRegistry.cpp

index df07d9e483e4db3f947692e36cb4ef42c72d7d1a..fe2d830e955ef6289a1f88dac6ab4c0256021b6d 100644 (file)
--- a/compiler/luci/import/src/GraphBuilderRegistry.cpp
+++ b/compiler/luci/import/src/GraphBuilderRegistry.cpp
@@ -131,6 +131,7 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceGraphBuilder);                              // 45
    CIRCLE_NODE(SUB, CircleSubGraphBuilder);                                                 // 41
    CIRCLE_NODE(SUM, CircleSumGraphBuilder);                                                 // 74
+  CIRCLE_NODE(SVDF, CircleSVDFBuilder);                                                    // 27
    CIRCLE_NODE(TANH, CircleTanhGraphBuilder);                                               // 28
    CIRCLE_NODE(TILE, CircleTileGraphBuilder);                                               // 69
    CIRCLE_NODE(TOPK_V2, CircleTopKV2GraphBuilder);                                          // 48
@@ -150,7 +151,6 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    // BuiltinOperator_LSH_PROJECTION = 15,
    // BuiltinOperator_LSTM = 16,
    // BuiltinOperator_RNN = 24,
-  // BuiltinOperator_SVDF = 27,
    // BuiltinOperator_CONCAT_EMBEDDINGS = 29,
    // BuiltinOperator_SKIP_GRAM = 30,
    // BuiltinOperator_CALL = 31,
@@ -161,6 +161,13 @@ GraphBuilderRegistry::GraphBuilderRegistry()
    // BuiltinOperator_ARG_MAX = 56,
    // BuiltinOperator_HARD_SWISH = 117,
    // BuiltinOperator_DENSIFY = 124,
+
+  // Register builders for nodes which not handles in builders registered above.
+#define CIRCLE_NODE(CLASS) add(std::make_unique<CLASS>())
+
+  CIRCLE_NODE(CircleConstNodeBuilder);
+
+#undef CIRCLE_NODE
  }
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp

index 3f7f78591720e7a75d6ce6cc2f32bc780f7ccdfb..15de03df2be2cf4155f856271004da9bfd7a4047 100644 (file)
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -23,6 +23,7 @@
  #include "luci/Import/GraphBuilderRegistry.h"
  #include "luci/Import/CircleReader.h"
  #include "luci/Import/Nodes/CircleConst.h"
+#include "luci/Import/Nodes/CircleVariable.h"
  
  #include <luci/IR/Module.h>
  #include <luci/IR/CircleNodes.h>
@@ -50,18 +51,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
  
    luci::GraphBuilderContext gb_context(graph, &reader, nodefinder.get(), tensoroutputs.get());
  
-  const auto &operators = reader.operators();
-  const auto &tensors = reader.tensors();
-  auto tensors_ptr = reader.tensors_ptr();
-  assert(tensors_ptr != nullptr);
+  const auto operators = reader.operators();
+  const auto tensors = reader.tensors();
+  assert(!tensors.null());
    auto circle_metadata = std::make_unique<luci::CircleImportMetadata>(reader);
  
    // build a cache to identify if a tensor is output of an operator
    // if this is set, we should not create a CircleConst for this tensor
    for (uint32_t i = 0; i < operators.size(); ++i)
    {
-    const circle::OperatorT &op = *operators[i];
-    const auto &outputs = op.outputs;
+    const auto op = operators[i];
+    assert(op != nullptr);
+    const auto outputs = luci::wrap(op->outputs());
  
      for (uint32_t j = 0; j < outputs.size(); ++j)
      {
@@ -77,10 +78,11 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
    {
      auto input_node = graph->nodes()->create<luci::CircleInput>();
      assert(input_node != nullptr);
-    const circle::TensorT &tensor = *tensors[input];
+    const auto tensor = tensors[input];
+    assert(tensor != nullptr);
  
      luci::copy_tensor_attributes(tensor, input_node);
-    if (tensors_ptr->Get(input)->shape() == nullptr)
+    if (tensor->shape() == nullptr)
        input_node->shape_status(luci::ShapeStatus::NOSHAPE);
      else
        input_node->shape_status(luci::ShapeStatus::VALID);
@@ -101,16 +103,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      // Data type
      graph_input->dtype(input_node->dtype());
  
-    assert(tensor.shape_signature.size() == 0 ||
-           tensor.shape_signature.size() == tensor.shape.size());
+    const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+    const auto tensor_shape = luci::wrap(tensor->shape());
+    assert(tensor_shape_signature.size() == 0 ||
+           tensor_shape_signature.size() == tensor_shape.size());
  
      // Shape of GraphInput
      auto input_shape = std::make_unique<loco::TensorShape>();
-    const std::vector<int32_t> &input_dims = tensor.shape; // in NHWC
+    const auto &input_dims = tensor_shape; // in NHWC
      input_shape->rank(input_dims.size());
      for (uint32_t r = 0; r < input_dims.size(); ++r)
      {
-      if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+      if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
          input_shape->dim(r).unset();
        else
          input_shape->dim(r).set(input_dims[r]);
@@ -118,15 +122,28 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      graph_input->shape(std::move(input_shape));
    }
  
-  // Create CircleConst nodes for constant tensors.
+  // Create CircleNodes for constant tensors.
    // NOTE Origin is intentionally not provided for constants.
+  auto const_builder = source.lookup(luci::NodeBuilderType::BUFFER);
+  if (not const_builder)
+    throw oops::UserExn("Not supported", "tensor with buffer builder");
+
    for (uint32_t i = 0; i < tensors.size(); ++i)
    {
-    luci::CircleConst *const_node = luci::create_circleconst(&gb_context, i);
+    auto *const_node = const_builder->build(i, &gb_context);
      if (const_node != nullptr)
        nodefinder->enroll(i, const_node);
    }
  
+  // Create CircleVariable nodes for variable tensors
+  // TODO Add Origin if needed, skip for now
+  for (uint32_t i = 0; i < tensors.size(); ++i)
+  {
+    luci::CircleVariable *variable_node = luci::create_circlevariable(&gb_context, i);
+    if (variable_node != nullptr)
+      nodefinder->enroll(i, variable_node);
+  }
+
    // Import the operators.
    // Note that operators in model are stored in execution order. This means that when importing
    // an operator, its input operators have already been imported. We exploit this fact to set up
@@ -134,18 +151,23 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
    auto origin_table = circle_metadata->origin_table();
    for (uint32_t i = 0; i < operators.size(); ++i)
    {
-    const circle::OperatorT &op = *operators[i];
+    const auto op = operators[i];
+    assert(op != nullptr);
      circle::BuiltinOperator builtincode = reader.builtin_code(op);
  
      if (const auto *builder = source.lookup(builtincode))
      {
-      luci::GraphBuilder::ValidateArgs args(op, reader);
+      // create temporary unpack API obj
+      circle::OperatorT oper_t;
+      op->UnPackTo(&oper_t);
+
+      luci::GraphBuilder::ValidateArgs args(oper_t, reader);
        if (!builder->validate(args))
        {
          throw oops::UserExn("Invalid operator", reader.opcode_name(op));
        }
  
-      auto built_op = builder->build(op, &gb_context);
+      auto built_op = builder->build(oper_t, &gb_context);
        set_node_id(built_op, i);
        if (origin_table.find(i) != origin_table.end())
          add_origin(built_op, origin_table.at(i));
@@ -161,7 +183,8 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
    // graph outputs
    for (auto output : reader.outputs())
    {
-    const circle::TensorT &tensor = *tensors[output];
+    const auto tensor = tensors[output];
+    assert(tensor != nullptr);
  
      auto output_node = graph->nodes()->create<luci::CircleOutput>();
      assert(output_node != nullptr);
@@ -178,7 +201,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
        output_node->from(output_dummy);
  
        luci::copy_tensor_attributes(tensor, output_dummy);
-      if (tensors_ptr->Get(output)->shape() == nullptr)
+      if (tensor->shape() == nullptr)
          output_dummy->shape_status(luci::ShapeStatus::NOSHAPE);
        else
          output_dummy->shape_status(luci::ShapeStatus::VALID);
@@ -197,16 +220,18 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      // Set GraphInputOutputIndex for graph
      output_node->index(graph_output->index());
  
-    assert(tensor.shape_signature.size() == 0 ||
-           tensor.shape_signature.size() == tensor.shape.size());
+    const auto tensor_shape_signature = luci::wrap(tensor->shape_signature());
+    const auto tensor_shape = luci::wrap(tensor->shape());
+    assert(tensor_shape_signature.size() == 0 ||
+           tensor_shape_signature.size() == tensor_shape.size());
  
      // Shape of Output
      auto output_shape = std::make_unique<loco::TensorShape>();
-    const std::vector<int32_t> &output_dims = tensor.shape; // in NHWC
+    const auto &output_dims = tensor_shape; // in NHWC
      output_shape->rank(output_dims.size());
      for (uint32_t r = 0; r < output_dims.size(); ++r)
      {
-      if (tensor.shape_signature.size() > 0 && tensor.shape_signature.at(r) == -1)
+      if (tensor_shape_signature.size() > 0 && tensor_shape_signature.at(r) == -1)
          output_shape->dim(r).unset();
        else
          output_shape->dim(r).set(output_dims[r]);
@@ -214,7 +239,7 @@ void convert_graph(const luci::GraphBuilderSource &source, luci::CircleReader &r
      graph_output->shape(std::move(output_shape));
  
      // Data type
-    auto dtype = luci::luci_datatype(tensor.type);
+    auto dtype = luci::luci_datatype(tensor->type());
      graph_output->dtype(dtype);
    }
  }
@@ -355,7 +380,12 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
      {
        if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
        {
+        if (execution_plan_table.count(node_position) == 0)
+          continue;
+
          auto node_plan = execution_plan_table[node_position];
+        assert(node_plan.size() > 0);
+
          luci::add_execution_plan(
            circle_node,
            luci::CircleNodeExecutionPlan(
diff --git a/compiler/luci/import/src/Importer.test.cpp b/compiler/luci/import/src/Importer.test.cpp

index d963b4d49c56288e1829fdc34defb11fb9636e36..91e4860ea9a55726c35b1b8fe7ac9447881d428a 100644 (file)
--- a/compiler/luci/import/src/Importer.test.cpp
+++ b/compiler/luci/import/src/Importer.test.cpp
@@ -23,7 +23,7 @@
  #include <mio/circle/schema_generated.h>
  #include <flatbuffers/flatbuffers.h>
  
-TEST(TensorFlowLiteImport, Dummy)
+TEST(CircleImport, Dummy)
  {
    luci::Importer import;
  
@@ -68,6 +68,7 @@ struct BasicCircleModel
    {
      uint32_t id = model->operator_codes.size();
      model->operator_codes.push_back(std::make_unique<circle::OperatorCodeT>());
+    model->operator_codes[id]->deprecated_builtin_code = opcode;
      model->operator_codes[id]->builtin_code = opcode;
      model->operator_codes[id]->version = 1;
      return id;
@@ -179,7 +180,7 @@ struct SimpleRELUModel : public BasicCircleModel
  /**
   * This test checks that one op RELU model with execution plan is successfully imported
   */
-TEST(TensorFlowLiteImport, simple_plan)
+TEST(CircleImport, simple_plan)
  {
    SimpleRELUModel model;
    auto metadata_buffer_id = model.add_buffer();
@@ -240,7 +241,7 @@ TEST(TensorFlowLiteImport, simple_plan)
  /**
   * This test checks that model with incomplete execution plan is successfully imported
   */
-TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
+TEST(CircleImport, incomplete_plan_NEG)
  {
    SimpleRELUModel model;
    auto metadata_buffer_id = model.add_buffer();
@@ -287,7 +288,7 @@ TEST(TensorFlowLiteImport, DISABLED_incomplete_plan_NEG)
  /**
   * This test checks that corrupted execution plan induce exception
   */
-TEST(TensorFlowLiteImport, corrupted_plan_NEG)
+TEST(CircleImport, corrupted_plan_NEG)
  {
    SimpleRELUModel model;
    auto metadata_buffer_id = model.add_buffer();
@@ -309,3 +310,44 @@ TEST(TensorFlowLiteImport, corrupted_plan_NEG)
  
    ASSERT_ANY_THROW(import.importModule(model_ptr));
  }
+
+/**
+ * This test checks that empty execution plan entry induce exception
+ */
+TEST(CircleImport, corrupted_plan_entry_NEG)
+{
+  SimpleRELUModel model;
+  auto metadata_buffer_id = model.add_buffer();
+  model.add_plan_metadata(metadata_buffer_id);
+
+  model.add_plan_entry(metadata_buffer_id, 1, {100});
+
+  // add corrupted entry with 0 size
+  {
+    auto &buffer = model.model->buffers[metadata_buffer_id]->data;
+    auto old_size = buffer.size();
+
+    // Allocate space for new entry:
+    // 4 bytes for entry id
+    // 4 bytes for entry size
+    buffer.resize(old_size + 8);
+    uint32_t *number_of_entries_ptr = reinterpret_cast<uint32_t *>(buffer.data());
+    *number_of_entries_ptr += 1;
+
+    uint32_t *entry_data_ptr = reinterpret_cast<uint32_t *>(buffer.data() + old_size);
+
+    entry_data_ptr[0] = *number_of_entries_ptr - 1; // entry id
+    entry_data_ptr[1] = 0;                          // entry size
+  }
+
+  model.add_plan_entry(metadata_buffer_id, 3, {200});
+
+  flatbuffers::FlatBufferBuilder fbb;
+  auto model_offset = circle::Model::Pack(fbb, model.model.get(), nullptr);
+  circle::FinishModelBuffer(fbb, model_offset);
+
+  auto model_ptr = circle::GetModel(fbb.GetBufferPointer());
+  luci::Importer import;
+
+  ASSERT_ANY_THROW(import.importModule(model_ptr));
+}
diff --git a/compiler/luci/import/src/Nodes/CircleCast.cpp b/compiler/luci/import/src/Nodes/CircleCast.cpp

index 3e8c08bfaad7c9886033ef045600d5fc41274a91..acde823b1f85f126c2c3412d4102cc20562f1a38 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCast.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCast.cpp
@@ -42,12 +42,14 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
    const auto *options = args.op.builtin_options.AsCastOptions();
    if (options != nullptr)
    {
-    const auto &tensors = args.reader.tensors();
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto tensors = args.reader.tensors();
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
      auto name = tensor_name(output_tensor);
  
-    const auto &tensor_in = tensors.at(inputs.at(0));
-    if (tensor_in->type != options->in_data_type)
+    const auto tensor_in = tensors.at(inputs.at(0));
+    assert(tensor_in != nullptr);
+    if (tensor_in->type() != options->in_data_type)
      {
        if (settings->get(luci::UserSettings::Key::DisableValidation))
        {
@@ -57,7 +59,7 @@ bool CircleCastGraphBuilder::validate(const ValidateArgs &args) const
          return false;
      }
      const auto &tensor_out = tensors.at(outputs[0]);
-    if (tensor_out->type != options->out_data_type)
+    if (tensor_out->type() != options->out_data_type)
      {
        if (settings->get(luci::UserSettings::Key::DisableValidation))
        {
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp

index 11fbb4e54c425367f707afb362be4c3190f249fc..a4f190dd95c933ad493dd0380d3c7ac55cf0931b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -30,10 +30,10 @@
  namespace
  {
  
-std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
  {
    uint32_t seq = 0;
-  for (auto &v : vect)
+  for (const auto &v : vect)
    {
      if (seq)
        os << ", ";
@@ -46,7 +46,8 @@ std::ostream &operator<<(std::ostream &os, const std::vector<int32_t> &vect)
  using namespace luci;
  
  template <loco::DataType DT>
-void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, CircleConst *const_node)
+void copy_data(const VectorWrapper<uint8_t> &raw_data, uint32_t num_elements,
+               CircleConst *const_node)
  {
    using T = typename loco::DataTypeImpl<DT>::Type;
  
@@ -67,8 +68,8 @@ void copy_data(const std::vector<uint8_t> &raw_data, uint32_t num_elements, Circ
  }
  
  template <>
-void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uint32_t num_elements,
-                                       CircleConst *const_node)
+void copy_data<loco::DataType::STRING>(const VectorWrapper<uint8_t> &raw_data,
+                                       uint32_t num_elements, CircleConst *const_node)
  {
    assert(const_node->sparsityparam() == nullptr);
  
@@ -106,17 +107,26 @@ void copy_data<loco::DataType::STRING>(const std::vector<uint8_t> &raw_data, uin
  namespace luci
  {
  
-CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_index)
+CircleNode *CircleConstNodeBuilder::build(TensorIndex tensor_index,
+                                          GraphBuilderContext *context) const
  {
+  assert(tensor_index >= 0);
    LOGGER(l);
  
    auto graph = context->graph();
    auto reader = context->reader();
-  const auto &tensors = reader->tensors();
-  const circle::TensorT &const_tensor = *tensors[tensor_index];
+  const auto tensors = reader->tensors();
+  const auto const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
  
-  const std::vector<uint8_t> &buffer = reader->buffers()[const_tensor.buffer]->data;
-  std::vector<int32_t> const_dims = const_tensor.shape; // in NHWC
+  assert(reader->buffers()[const_tensor->buffer()] != nullptr);
+  const auto buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  const auto const_dims = wrap(const_tensor->shape()); // in NHWC
    if (const_dims.size() == 0 && buffer.empty())
    {
      // unknown shape tensor and scalar tensor
@@ -150,7 +160,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
            << const_dims << std::endl;
    if (num_elements > 0)
    {
-    switch (luci_datatype(const_tensor.type))
+    switch (luci_datatype(const_tensor->type()))
      {
        case loco::DataType::FLOAT32:
          copy_data<loco::DataType::FLOAT32>(buffer, num_elements, const_node);
@@ -186,7 +196,7 @@ CircleConst *create_circleconst(GraphBuilderContext *context, int32_t tensor_ind
  
        default:
          throw oops::UserExn("Unsupported tensor type",
-                            circle::EnumNameTensorType(const_tensor.type));
+                            circle::EnumNameTensorType(const_tensor->type()));
      }
    }
  
diff --git a/compiler/luci/import/src/Nodes/CircleCustom.cpp b/compiler/luci/import/src/Nodes/CircleCustom.cpp

index 01ac3e2a0050dd3d4ae1cee422f0a2a8df1df2ca..4e78d5fb725fd2b4a39da78ed168eade2502061c 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleCustom.cpp
+++ b/compiler/luci/import/src/Nodes/CircleCustom.cpp
@@ -39,13 +39,15 @@ CircleNode *CircleCustomGraphBuilder::build_node(const BuildNodeArgs &bna) const
      node->inputs(idx, bna.input_nodes[idx]);
    }
  
-  const auto &opcodes = bna.context->reader()->opcodes();
+  const auto opcodes = bna.context->reader()->opcodes();
    const uint32_t opcode_index = bna.op.opcode_index;
-  const circle::OperatorCodeT &opcode = *opcodes[opcode_index];
+  const auto opcode = opcodes[opcode_index];
+  assert(opcode != nullptr);
  
    node->custom_options(
      std::vector<uint8_t>{bna.op.custom_options.begin(), bna.op.custom_options.end()});
-  node->custom_code(opcode.custom_code);
+  assert(opcode->custom_code() != nullptr);
+  node->custom_code(opcode->custom_code()->c_str());
  
    // NOTE Operator version of custom is always 1
  
diff --git a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp

index 49eb30a838a830c5bad5854f71c1a9f9d7dd30e3..83fc2e37d00751fc707e7240dfdb35e5e9944930 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthToSpace.cpp
@@ -34,9 +34,10 @@ bool CircleDepthToSpaceGraphBuilder::validate(const ValidateArgs &args) const
    const auto &outputs = args.op.outputs;
  
    const auto *options = args.op.builtin_options.AsDepthToSpaceOptions();
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
+  assert(tensors[outputs[0]] != nullptr && tensors[inputs.at(0)] != nullptr);
  
-  if (tensors[outputs[0]]->type != tensors[inputs.at(0)]->type)
+  if (tensors[outputs[0]]->type() != tensors[inputs.at(0)]->type())
    {
      return false;
    }
diff --git a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp

index 727487c6a28fece90e8c849a121c982bdde8f11f..a24e4160d6363543a8aa5fa10b69af30113c4ff8 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleDepthwiseConv2D.cpp
@@ -32,19 +32,21 @@ bool CircleDepthwiseConv2DGraphBuilder::validate(const ValidateArgs &args) const
    if (args.op.outputs.size() != 1)
      return false;
  
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
    // input shape
-  const auto &input = tensors.at(args.op.inputs.at(0));
-  const auto &input_shape = input->shape;
+  const auto input = tensors.at(args.op.inputs.at(0));
+  assert(input != nullptr);
+  const auto input_shape = wrap(input->shape());
  
    // input shape must be rank 4
    if (input_shape.size() != 4)
      return false;
  
    // filter shape
-  const auto &filter = tensors.at(args.op.inputs.at(1));
-  const auto &filter_shape = filter->shape;
+  const auto filter = tensors.at(args.op.inputs.at(1));
+  assert(filter != nullptr);
+  const auto filter_shape = wrap(filter->shape());
  
    // filter shape must be rank 4
    if (filter_shape.size() != 4)
diff --git a/compiler/luci/import/src/Nodes/CircleElu.cpp b/compiler/luci/import/src/Nodes/CircleElu.cpp

index 41696a65a555d6a0a9829475f77723fd2749216e..e5d7a4c7ab1d0449cd43e0da085eeb470d4c1f35 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleElu.cpp
+++ b/compiler/luci/import/src/Nodes/CircleElu.cpp
@@ -31,10 +31,11 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
  
-  switch (tensor->type)
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT64:
        break;
@@ -48,7 +49,8 @@ bool CircleEluGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleEqual.cpp b/compiler/luci/import/src/Nodes/CircleEqual.cpp

index 4909692b43695fbf24f0e27df30f8faf379f3fd7..b326d9b5d2174ba428bb7d8a95f0163b9f3c91b6 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleEqual.cpp
@@ -29,9 +29,10 @@ bool CircleEqualGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  return tensors[inputs.at(0)]->type == tensors[inputs.at(1)]->type;
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  return tensors[inputs.at(0)]->type() == tensors[inputs.at(1)]->type();
  }
  
  CircleNode *CircleEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleExp.cpp b/compiler/luci/import/src/Nodes/CircleExp.cpp

index 5bb7bb664a2224a2b683484aea422720df937161..82c26f0e5b72caec5a2882793e0c0d30318c8333 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExp.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExp.cpp
@@ -30,9 +30,10 @@ bool CircleExpGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    // input type check
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp

index ee0fbdc7e8d3b813e783f3b5918c5814ec529b0f..67d9b7e9e7c1fa07021c49ccc6ac5c7f6a6fd68b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
+++ b/compiler/luci/import/src/Nodes/CircleExpandDims.cpp
@@ -29,9 +29,10 @@ bool CircleExpandDimsGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  return tensors[inputs.at(1)]->type == circle::TensorType_INT32;
+  assert(tensors[inputs.at(1)] != nullptr);
+  return tensors[inputs.at(1)]->type() == circle::TensorType_INT32;
  }
  
  CircleNode *CircleExpandDimsGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp

index ce329326a7fc4d65bed7827cb3e51071bcda7d91..67eeddf91aa17f2b5da6778d6de4bba4589474fc 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorDiv.cpp
@@ -30,15 +30,18 @@ bool CircleFloorDivGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs.at(0));
-  const auto &tensor_in_1 = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
-
-  if (tensor_in_0->type != tensor_in_1->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in_0 = tensors.at(inputs.at(0));
+  const auto tensor_in_1 = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in_0 != nullptr);
+  assert(tensor_in_1 != nullptr);
+  assert(tensor_out != nullptr);
+
+  if (tensor_in_0->type() != tensor_in_1->type())
      return false;
  
-  if (tensor_out->type != tensor_in_1->type)
+  if (tensor_out->type() != tensor_in_1->type())
    {
      return false;
    }
diff --git a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp

index d8420a43c575abb6de5a51e0aee663877edfbf78..d2a275b62cc80beb6f0af0a75d59bac06a2bfd89 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFloorMod.cpp
@@ -29,10 +29,11 @@ bool CircleFloorModGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in_0 = tensors.at(inputs.at(0));
-  const auto &tensor_in_1 = tensors.at(inputs.at(1));
-  if (tensor_in_0->type != tensor_in_1->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in_0 = tensors.at(inputs.at(0));
+  const auto tensor_in_1 = tensors.at(inputs.at(1));
+  assert(tensor_in_0 != nullptr && tensor_in_1 != nullptr);
+  if (tensor_in_0->type() != tensor_in_1->type())
      return false;
  
    // TODO dtype check
diff --git a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp

index 58750d79a8cfdfc44de8de512197037ecf72708b..cc7be16936fa476b56268205b011f74b6ea0aa55 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
+++ b/compiler/luci/import/src/Nodes/CircleFullyConnected.cpp
@@ -42,6 +42,7 @@ CircleNode *CircleFullyConnectedGraphBuilder::build_node(const circle::OperatorT
    const auto *options = op.builtin_options.AsFullyConnectedOptions();
    node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
    node->weights_format(luci_weights_format(options->weights_format));
+  node->keep_num_dims(options->keep_num_dims);
  
    return node;
  }
diff --git a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp

index a4bb26a10bacb3e12d1af4c5dacc584cdf4b8685..d336878ad3221bdb8eea79a4bf8835dbf75fb582 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGatherNd.cpp
@@ -31,10 +31,11 @@ bool CircleGatherNdGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  auto &indices_tensor = args.reader.tensors()[inputs.at(1)];
+  auto indices_tensor = args.reader.tensors()[inputs.at(1)];
+  assert(indices_tensor != nullptr);
  
-  if (!(indices_tensor->type == circle::TensorType::TensorType_INT32 ||
-        indices_tensor->type == circle::TensorType::TensorType_INT64))
+  if (!(indices_tensor->type() == circle::TensorType::TensorType_INT32 ||
+        indices_tensor->type() == circle::TensorType::TensorType_INT64))
    {
      return false;
    }
diff --git a/compiler/luci/import/src/Nodes/CircleGreater.cpp b/compiler/luci/import/src/Nodes/CircleGreater.cpp

index f9c00346c7fd01aa41296cf638f676a86c15a34e..7f031b0ba83c750cba8dbc022916776168750d2a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreater.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreater.cpp
@@ -37,17 +37,19 @@ bool CircleGreaterGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
      return false;
  
    // NOTE: real models do have output dtype NOT BOOL
-  if (tensors[outputs[0]]->type != circle::TensorType_BOOL)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != circle::TensorType_BOOL)
    {
      if (settings->get(luci::UserSettings::Key::DisableValidation))
      {
-      const circle::TensorT &output_tensor = *tensors[outputs[0]];
+      const auto output_tensor = tensors[outputs[0]];
        auto name = tensor_name(output_tensor);
        WARN(l) << "Warning: import Greater(" << name << ") output dtype is not boolean";
      }
diff --git a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp

index e20038fd903ed1ded74b0a19a9505696054e489e..ac4ce62f537ebb0826fc1cdd99dc70678db51826 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleGreaterEqual.cpp
@@ -30,14 +30,16 @@ bool CircleGreaterEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
    {
      return false;
    }
  
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
  }
  
  CircleNode *CircleGreaterEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleIf.cpp b/compiler/luci/import/src/Nodes/CircleIf.cpp

index ffdbf0b791a23f0d043f3d4267fa1ccd14111bc4..e8a50ff326a2291569fae7e288a209e401dda6cc 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleIf.cpp
+++ b/compiler/luci/import/src/Nodes/CircleIf.cpp
@@ -42,12 +42,13 @@ bool CircleIfGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    // input 0 should be BOOL type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_BOOL)
      return false;
  
-  const auto &shape = tensor->shape;
+  const auto shape = wrap(tensor->shape());
    if (shape.size() != 1 && shape.size() != 0)
      return false;
  
diff --git a/compiler/luci/import/src/Nodes/CircleLess.cpp b/compiler/luci/import/src/Nodes/CircleLess.cpp

index f9b99bebe8611e9e8aaf712dd6735b773e539460..5c5ae51e13698000904409c0d178cb4549ccdcc2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLess.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLess.cpp
@@ -30,10 +30,11 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
  
-  switch (tensor->type)
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT32:
      case circle::TensorType_FLOAT64:
@@ -48,12 +49,14 @@ bool CircleLessGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensors[inputs.at(1)]->type != tensor->type)
+  assert(tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(1)]->type() != tensor->type())
    {
      return false;
    }
  
-  return tensors[outputs[0]]->type == circle::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType_BOOL;
  }
  
  CircleNode *CircleLessGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp

index bb17121370ea3b6d63cbde882d8e9cfc9763d902..8a2aea8db07b9f6ad8ee56d187348a7d40b3f890 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLessEqual.cpp
@@ -30,14 +30,16 @@ bool CircleLessEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
    {
      return false;
    }
  
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
  }
  
  CircleNode *CircleLessEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleLog.cpp b/compiler/luci/import/src/Nodes/CircleLog.cpp

index 26b575070772c26a7adadaa0fcda8bbc6e0ffe7c..f419268295819d68eeb8d713887fd3bd3ed586ff 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLog.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLog.cpp
@@ -32,9 +32,10 @@ bool CircleLogGraphBuilder::validate(const ValidateArgs &args) const
    // input type check
    // Must be one of bfloat16, half, float32, float64, complex64, complex128.
    // Currently circle supports half(float16), float32, float64, complex64.
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp

index b13fc2735f442c4a3177ba5b0b5c8ac454afbef9..b61fb6f3e3de561c55f3a1a217008cc88534b48a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalAnd.cpp
@@ -30,11 +30,12 @@ bool CircleLogicalAndGraphBuilder::validate(const ValidateArgs &args) const
  
    // Only BOOL type is allowed for inputs
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
    for (auto input : inputs)
    {
-    const auto &tensor = tensors.at(input);
-    if (tensor->type != circle::TensorType::TensorType_BOOL)
+    const auto tensor = tensors.at(input);
+    assert(tensor != nullptr);
+    if (tensor->type() != circle::TensorType::TensorType_BOOL)
        return false;
    }
  
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp

index f682183496ffb60247005df14015913b9012efbf..43e9ed39f494046d5648380d6d38c411daf30c04 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalNot.cpp
@@ -30,9 +30,10 @@ bool CircleLogicalNotGraphBuilder::validate(const ValidateArgs &args) const
  
    // Only BOOL type is allowed for the input
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType::TensorType_BOOL)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp

index 8c9023dd3e2f8b1dde015af950f04cc268b96a93..6354e7dc10536e220c0df3db7b1230c663e85623 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogicalOr.cpp
@@ -30,11 +30,12 @@ bool CircleLogicalOrGraphBuilder::validate(const ValidateArgs &args) const
  
    // Only BOOL type is allowed for inputs
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
    for (auto input : inputs)
    {
-    const auto &tensor = tensors.at(input);
-    if (tensor->type != circle::TensorType::TensorType_BOOL)
+    const auto tensor = tensors.at(input);
+    assert(tensor != nullptr);
+    if (tensor->type() != circle::TensorType::TensorType_BOOL)
        return false;
    }
  
diff --git a/compiler/luci/import/src/Nodes/CircleLogistic.cpp b/compiler/luci/import/src/Nodes/CircleLogistic.cpp

index 0f92a9bb47a3d229bafc7bdc2b69770408a82c48..b0d08e039b79079db9f70da60ea47d1fbfd07942 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleLogistic.cpp
+++ b/compiler/luci/import/src/Nodes/CircleLogistic.cpp
@@ -30,8 +30,9 @@ bool CircleLogisticGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  const auto tensors = args.reader.tensors();
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp

index 590a07f2dc6167c51b8c9666bb3a21695290a5ee..384b985865ff17684f8287505a18b75917ed53f4 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixDiag.cpp
@@ -30,10 +30,11 @@ bool CircleMatrixDiagGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
  
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp

index edd7d2ae2214bb646ce5b80ff9a950629f5a148e..64870c0573f98b8d3a01674ae9b9a5b786056c9a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
+++ b/compiler/luci/import/src/Nodes/CircleMatrixSetDiag.cpp
@@ -30,10 +30,11 @@ bool CircleMatrixSetDiagGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
  
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr && tensor != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp

index d3d69506be90f1da92ac092dd7498e5b3dd758c3..e86f2ba81226586ac9e1b0d4473f68970fbe7041 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV4.cpp
@@ -35,20 +35,26 @@ bool CircleNonMaxSuppressionV4GraphBuilder::validate(const ValidateArgs &args) c
    if (outputs.size() != 2)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &boxes_tensor = tensors.at(inputs[0]);
-  if (boxes_tensor->shape.size() != 2)
+  const auto tensors = args.reader.tensors();
+  const auto boxes_tensor = tensors.at(inputs[0]);
+  assert(boxes_tensor != nullptr);
+  const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+  if (boxes_tensor_shape.size() != 2)
      return false;
-  if (boxes_tensor->shape.at(1) != 4)
+  if (boxes_tensor_shape.at(1) != 4)
      return false;
-  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+  assert(tensors.at(inputs[1]) != nullptr);
+  if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
      return false;
  
-  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+  assert(tensors.at(inputs[2]) != nullptr);
+  if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
      return false;
-  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[3]) != nullptr);
+  if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
      return false;
-  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[4]) != nullptr);
+  if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp

index d797d4cb79e415fcb5828da0669c65e76ac98f23..a60eed4e4ed5f0ee86925df576c7034cbe856931 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNonMaxSuppressionV5.cpp
@@ -35,22 +35,29 @@ bool CircleNonMaxSuppressionV5GraphBuilder::validate(const ValidateArgs &args) c
    if (outputs.size() != 3)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &boxes_tensor = tensors.at(inputs[0]);
-  if (boxes_tensor->shape.size() != 2)
+  const auto tensors = args.reader.tensors();
+  const auto boxes_tensor = tensors.at(inputs[0]);
+  assert(boxes_tensor != nullptr);
+  const auto boxes_tensor_shape = wrap(boxes_tensor->shape());
+  if (boxes_tensor_shape.size() != 2)
      return false;
-  if (boxes_tensor->shape.at(1) != 4)
+  if (boxes_tensor_shape.at(1) != 4)
      return false;
-  if (boxes_tensor->shape.at(0) != tensors.at(inputs[1])->shape.at(0))
+  assert(tensors.at(inputs[1]) != nullptr);
+  if (boxes_tensor_shape.at(0) != wrap(tensors.at(inputs[1])->shape()).at(0))
      return false;
  
-  if (tensors.at(inputs[2])->type != circle::TensorType_INT32)
+  assert(tensors.at(inputs[2]) != nullptr);
+  if (tensors.at(inputs[2])->type() != circle::TensorType_INT32)
      return false;
-  if (tensors.at(inputs[3])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[3]) != nullptr);
+  if (tensors.at(inputs[3])->type() != circle::TensorType_FLOAT32)
      return false;
-  if (tensors.at(inputs[4])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[4]) != nullptr);
+  if (tensors.at(inputs[4])->type() != circle::TensorType_FLOAT32)
      return false;
-  if (tensors.at(inputs[5])->type != circle::TensorType_FLOAT32)
+  assert(tensors.at(inputs[5]) != nullptr);
+  if (tensors.at(inputs[5])->type() != circle::TensorType_FLOAT32)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp

index a0b8f9e4f72245e8aaae4b903f2c8766ccfd3d0f..3f5c1e03365a1b69187d7d5475b80d08e0d5c351 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
+++ b/compiler/luci/import/src/Nodes/CircleNotEqual.cpp
@@ -30,14 +30,16 @@ bool CircleNotEqualGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(1)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(1)]->type())
    {
      return false;
    }
  
-  return tensors[outputs[0]]->type == circle::TensorType::TensorType_BOOL;
+  assert(tensors[outputs[0]] != nullptr);
+  return tensors[outputs[0]]->type() == circle::TensorType::TensorType_BOOL;
  }
  
  CircleNode *CircleNotEqualGraphBuilder::build_node(const circle::OperatorT &,
diff --git a/compiler/luci/import/src/Nodes/CircleOneHot.cpp b/compiler/luci/import/src/Nodes/CircleOneHot.cpp

index 3952cc21a6ebee8ecf386648405c8ba780da6cd4..6e5f8e16f2d1ec80ccbb7f72a9fccc042a072660 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleOneHot.cpp
+++ b/compiler/luci/import/src/Nodes/CircleOneHot.cpp
@@ -32,21 +32,25 @@ bool CircleOneHotGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto *options = args.op.builtin_options.AsOneHotOptions();
-  const auto &tensors = args.reader.tensors();
-  const auto &indices = tensors.at(inputs.at(0));
-  const auto &depth = tensors.at(inputs.at(1));
-  const auto &on_value = tensors.at(inputs.at(2));
-  const auto &off_value = tensors.at(inputs.at(3));
+  const auto tensors = args.reader.tensors();
+  const auto indices = tensors.at(inputs.at(0));
+  const auto depth = tensors.at(inputs.at(1));
+  const auto on_value = tensors.at(inputs.at(2));
+  const auto off_value = tensors.at(inputs.at(3));
+  assert(indices != nullptr);
+  assert(depth != nullptr);
+  assert(on_value != nullptr);
+  assert(off_value != nullptr);
  
-  if (options->axis < -1 || options->axis > static_cast<int32_t>(indices->shape.size()))
+  if (options->axis < -1 || options->axis > static_cast<int32_t>(wrap(indices->shape()).size()))
      return false;
-  if (depth->shape.size() != 0)
+  if (wrap(depth->shape()).size() != 0)
      return false;
-  if (on_value->shape.size() != 0)
+  if (wrap(on_value->shape()).size() != 0)
      return false;
-  if (off_value->shape.size() != 0)
+  if (wrap(off_value->shape()).size() != 0)
      return false;
-  if (on_value->type != off_value->type)
+  if (on_value->type() != off_value->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp

index 13205dd7a1c79a80c923bd83cd35c35180f11f27..ebe2368e0420e5dc35d5b609781451f5421c457f 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceAny.cpp
@@ -28,17 +28,20 @@ bool CircleReduceAnyGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  const auto &tensor_o = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_0 = tensors.at(inputs.at(0));
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  const auto tensor_o = tensors.at(outputs[0]);
+  assert(tensor_0 != nullptr);
+  assert(tensor_1 != nullptr);
+  assert(tensor_o != nullptr);
  
-  if (tensor_0->type != circle::TensorType_BOOL)
+  if (tensor_0->type() != circle::TensorType_BOOL)
      return false;
-  if (tensor_o->type != circle::TensorType_BOOL)
+  if (tensor_o->type() != circle::TensorType_BOOL)
      return false;
  
-  switch (tensor_1->type)
+  switch (tensor_1->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp

index 3549c1a1805fb5e912a8747929433646d44dbde8..3b874b7c97da6229d983bb2b353dca22c8ada2b1 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReduceProd.cpp
@@ -27,13 +27,14 @@ bool CircleReduceProdGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
+  const auto tensors = args.reader.tensors();
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  assert(tensor_1 != nullptr);
  
    // TODO check input types
  
    // Check for reduction_indices types
-  switch (tensor_1->type)
+  switch (tensor_1->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
diff --git a/compiler/luci/import/src/Nodes/CircleReshape.cpp b/compiler/luci/import/src/Nodes/CircleReshape.cpp

index 401dff0fcd16fc477f0a938f4b19bd15ea2bb1f4..3421620ce9ba8e9c63bb6a3af1315bceb5090290 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReshape.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReshape.cpp
@@ -34,12 +34,13 @@ bool CircleReshapeGraphBuilder::validate(const ValidateArgs &args) const
    if (args.op.inputs.size() == 2)
    {
      const auto &inputs = args.op.inputs;
-    const auto &tensors = args.reader.tensors();
-    const auto &tensor_in = tensors.at(inputs.at(1));
+    const auto tensors = args.reader.tensors();
+    const auto tensor_in = tensors.at(inputs.at(1));
+    assert(tensor_in != nullptr);
  
      // NOTE fix this if there is any other case
      // TensorFlow lite and circle only supports S32
-    if (tensor_in->type != circle::TensorType::TensorType_INT32)
+    if (tensor_in->type() != circle::TensorType::TensorType_INT32)
        return false;
    }
  
diff --git a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp

index 2fbb7a87c566765ead40810f9483accb5b9fdddc..c9cc792bbb192307056a5897fda23a66e1948d39 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseSequence.cpp
@@ -30,12 +30,15 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_lengths = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_lengths = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_lengths != nullptr);
+  assert(tensor_out != nullptr);
  
-  switch (tensor_lengths->type)
+  switch (tensor_lengths->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleReverseSequenceGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensor_in->type != tensor_out->type)
+  if (tensor_in->type() != tensor_out->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp

index ca76532010c52ea801eca8086a81dabcb965c995..c19a0fdd2e0a99c9dafec3d985ecc523a6cded59 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleReverseV2.cpp
@@ -30,12 +30,15 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_axis = tensors.at(inputs.at(1));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_axis != nullptr);
+  assert(tensor_out != nullptr);
  
-  switch (tensor_axis->type)
+  switch (tensor_axis->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleReverseV2GraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRound.cpp b/compiler/luci/import/src/Nodes/CircleRound.cpp

index d13e0fafe2830ffaec72878044d610f7d2ab4d9d..08cfae6c2e8998485eab7517f905d92661e81407 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRound.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRound.cpp
@@ -33,11 +33,13 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
    // Must be one of the following types
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_in != nullptr);
+  assert(tensor_out != nullptr);
  
-  switch (tensor_in->type)
+  switch (tensor_in->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
@@ -49,7 +51,7 @@ bool CircleRoundGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp

index a9ca90832c67cdc720232756fb46b291beaeb2b0..e3bc68f8b81d92f681217151b7935f7d446846cb 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
+++ b/compiler/luci/import/src/Nodes/CircleRsqrt.cpp
@@ -32,9 +32,10 @@ bool CircleRsqrtGraphBuilder::validate(const ValidateArgs &args) const
    // Must be one of the following types
    // bfloat16, half (float16), float32, float64, complex64, complex128
    // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_UINT8:
      case circle::TensorType_INT16:
diff --git a/compiler/luci/import/src/Nodes/CircleSVDF.cpp b/compiler/luci/import/src/Nodes/CircleSVDF.cpp

new file mode 100644 (file)

index 0000000..83a0251
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleSVDF.h"
+
+#include <luci/IR/Nodes/CircleSVDF.h>
+
+#include <loco.h>
+
+namespace luci
+{
+
+bool CircleSVDFBuilder::validate(const ValidateArgs &args) const
+{
+  const auto &inputs = args.op.inputs;
+  if (!(inputs.size() == 4 || inputs.size() == 5))
+    return false;
+
+  return true;
+}
+
+CircleNode *CircleSVDFBuilder::build_node(const circle::OperatorT &op,
+                                          const std::vector<CircleNode *> &inputs,
+                                          loco::Graph *graph) const
+{
+  auto *node = graph->nodes()->create<CircleSVDF>();
+  node->input(inputs.at(0));
+  node->weight_feature(inputs.at(1));
+  node->weight_time(inputs.at(2));
+  if (inputs.size() == 4)
+  {
+    auto *bias = graph->nodes()->create<CircleOutputExclude>();
+    // CircleOutputExclude doesn't need a type, but since all nodes must have a type,
+    // a dummy type is inserted.
+    bias->dtype(inputs.at(0)->dtype());
+    node->bias(bias);
+
+    node->input_activation_state(inputs.at(3));
+  }
+  else
+  {
+    node->bias(inputs.at(3));
+    node->input_activation_state(inputs.at(4));
+  }
+
+  const auto *options = op.builtin_options.AsSVDFOptions();
+  node->svdf_rank(options->rank);
+  node->fusedActivationFunction(luci_actfunc(options->fused_activation_function));
+  node->asymmetric_quantize_inputs(options->asymmetric_quantize_inputs);
+
+  return node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp

index f8c17511089a6c4e6f7c4fdfd2463973e09bf14d..ebe252527690b6f52e391fd1fa53115424c14a74 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
+++ b/compiler/luci/import/src/Nodes/CircleScatterNd.cpp
@@ -30,14 +30,15 @@ bool CircleScatterNdGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    // indices must have the same type as shape
-  const auto &tensors = args.reader.tensors();
+  const auto tensors = args.reader.tensors();
  
-  if (tensors[inputs.at(0)]->type != tensors[inputs.at(2)]->type)
+  assert(tensors[inputs.at(0)] != nullptr && tensors[inputs.at(2)] != nullptr);
+  if (tensors[inputs.at(0)]->type() != tensors[inputs.at(2)]->type())
      return false;
  
    // indices must be either int32 or int64
-  if (tensors[inputs.at(0)]->type != circle::TensorType_INT32 &&
-      tensors[inputs.at(0)]->type != circle::TensorType_INT64)
+  if (tensors[inputs.at(0)]->type() != circle::TensorType_INT32 &&
+      tensors[inputs.at(0)]->type() != circle::TensorType_INT64)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp

index bfa333e8d7910a0e7b9051bd8e0595666e388d2d..01d1aab4488aeebfffc9bf8344734411610f6c47 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSegmentSum.cpp
@@ -30,12 +30,15 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_in = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
-  const auto &tensor_ids = tensors.at(inputs.at(1));
+  const auto tensors = args.reader.tensors();
+  const auto tensor_in = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  const auto tensor_ids = tensors.at(inputs.at(1));
+  assert(tensor_in != nullptr);
+  assert(tensor_out != nullptr);
+  assert(tensor_ids != nullptr);
  
-  switch (tensor_ids->type)
+  switch (tensor_ids->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -44,7 +47,7 @@ bool CircleSegmentSumGraphBuilder::validate(const ValidateArgs &args) const
        return false;
    }
  
-  if (tensor_out->type != tensor_in->type)
+  if (tensor_out->type() != tensor_in->type())
    {
      return false;
    }
diff --git a/compiler/luci/import/src/Nodes/CircleSelect.cpp b/compiler/luci/import/src/Nodes/CircleSelect.cpp

index 36a5fa8a8dd84aa1e9709933caf1be158fdcfcea..002f62f6c9a6f5b12e6a951db5f1eddf2b593b61 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelect.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelect.cpp
@@ -29,9 +29,10 @@ bool CircleSelectGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  if (tensor->type != circle::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_BOOL)
      return false;
    // TODO check dtypes for input 1, 2
  
diff --git a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp

index 556c8fa338093dd69d6ce1aa30ad0c6b9bee8f18..062fdc143b0f0c3f78d6dfe07db9d414003ad15a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSelectV2.cpp
@@ -29,14 +29,16 @@ bool CircleSelectV2GraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &condition = tensors.at(inputs.at(0));
-  if (condition->type != circle::TensorType_BOOL)
+  const auto tensors = args.reader.tensors();
+  const auto condition = tensors.at(inputs.at(0));
+  assert(condition != nullptr);
+  if (condition->type() != circle::TensorType_BOOL)
      return false;
  
-  const auto &t = tensors.at(inputs.at(1));
-  const auto &e = tensors.at(inputs.at(2));
-  if (t->type != e->type)
+  const auto t = tensors.at(inputs.at(1));
+  const auto e = tensors.at(inputs.at(2));
+  assert(t != nullptr && e != nullptr);
+  if (t->type() != e->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleSin.cpp b/compiler/luci/import/src/Nodes/CircleSin.cpp

index 22f46112321a054652747a4322d17e9b38268a56..51ebf03555501c62c2ce983c3b087eab67f253b2 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSin.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSin.cpp
@@ -30,9 +30,10 @@ bool CircleSinGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    // input type check
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
diff --git a/compiler/luci/import/src/Nodes/CircleSquare.cpp b/compiler/luci/import/src/Nodes/CircleSquare.cpp

index 7ff2b84e6f07864fe48548f52a5e23156e8f3f26..bec84b4c06e66d84a9aa419a17d9557a55ad4646 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquare.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquare.cpp
@@ -29,13 +29,13 @@ bool CircleSquareGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  // Must be one of the following types
-  // bfloat16, half (float16), float32, float64, complex64, complex128
-  // Currently, circle supports float16, float32, complex64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
+    case circle::TensorType_UINT8:
+    case circle::TensorType_INT16:
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
      case circle::TensorType_FLOAT16:
diff --git a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp

index 33440d5ab20a28c93bdc6ea7effd359dfe02a913..1983465d3e059439e26c281735aba863585757ab 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
+++ b/compiler/luci/import/src/Nodes/CircleSquaredDifference.cpp
@@ -32,9 +32,10 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
    const auto &outputs = args.op.outputs;
    // Inputs must be one of the following types
    // bfloat16, half(float16), float32, float64, int32, int64, complex64, complex128
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
@@ -53,11 +54,13 @@ bool CircleSquaredDifferenceGraphBuilder::validate(const ValidateArgs &args) con
    }
  
    // Input types must match
-  if (tensors.at(inputs.at(0))->type != tensors.at(inputs.at(1))->type)
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(inputs.at(1)) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(inputs.at(1))->type())
      return false;
  
    // Input and output types must match
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  assert(tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTanh.cpp b/compiler/luci/import/src/Nodes/CircleTanh.cpp

index 95625a0e448b15e4630edd743caae557235831fd..80a0e887fbb3326f111c11f170ba68c20515b81a 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTanh.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTanh.cpp
@@ -30,8 +30,9 @@ bool CircleTanhGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  const auto tensors = args.reader.tensors();
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTile.cpp b/compiler/luci/import/src/Nodes/CircleTile.cpp

index 6da44130c994dfe5ba404c89f369dcd004f15021..c41a6ba3f9fbf0985792eb37249a58ffded80f42 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTile.cpp
@@ -32,9 +32,10 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
    auto outputs = args.op.outputs;
    // Multiples (inputs.at(1)) must be one of the following types
    // int32, int64
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(1));
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(1));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -44,7 +45,8 @@ bool CircleTileGraphBuilder::validate(const ValidateArgs &args) const
    }
  
    // Type of input and output must be the same
-  if (tensors.at(inputs.at(0))->type != tensors.at(outputs[0])->type)
+  assert(tensors.at(inputs.at(0)) != nullptr && tensors.at(outputs[0]) != nullptr);
+  if (tensors.at(inputs.at(0))->type() != tensors.at(outputs[0])->type())
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp

index 49f8587989c90bc95fc9190322333ec0aa756def..9f91737385c823f5eac49236a5e180e1f718ad01 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTopKV2.cpp
@@ -35,9 +35,10 @@ bool CircleTopKV2GraphBuilder::validate(const ValidateArgs &args) const
    if (outputs.size() != 2)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(1));
-  if (tensor->type != circle::TensorType_INT32)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(1));
+  assert(tensor != nullptr);
+  if (tensor->type() != circle::TensorType_INT32)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp

index 5a60e2f541b3bec757a2274dd2c5baa80600a345..041983dacc2b0d2793564df8bbbd3cf9e389458b 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
+++ b/compiler/luci/import/src/Nodes/CircleTransposeConv.cpp
@@ -31,11 +31,13 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
      return false;
  
    const auto &inputs = args.op.inputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &filter_tensor = tensors.at(inputs.at(1));
-  const auto &filter_shape = filter_tensor.get()->shape;
-  const auto &ifm_tensor = tensors.at(inputs.at(2));
-  const auto &ifm_shape = ifm_tensor.get()->shape;
+  const auto tensors = args.reader.tensors();
+  const auto filter_tensor = tensors.at(inputs.at(1));
+  assert(filter_tensor != nullptr);
+  const auto filter_shape = wrap(filter_tensor->shape());
+  const auto ifm_tensor = tensors.at(inputs.at(2));
+  assert(ifm_tensor != nullptr);
+  const auto ifm_shape = wrap(ifm_tensor->shape());
  
    // ifm and filters must be 4-D tensor
    if (ifm_shape.size() != 4)
@@ -45,7 +47,7 @@ bool CircleTransposeConvGraphBuilder::validate(const ValidateArgs &args) const
  
    // input shape : [batch, height, width, in_channels]
    // filters shape : [output_channels, height, weight, in_channels]
-  if (ifm_tensor.get()->shape.at(3) != filter_tensor.get()->shape.at(3))
+  if (ifm_shape.at(3) != filter_shape.at(3))
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleUnpack.cpp b/compiler/luci/import/src/Nodes/CircleUnpack.cpp

index 9bfc76b5722f33b781499de011b51063d131757c..6b340160952522530d829601526d237162d03e02 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleUnpack.cpp
+++ b/compiler/luci/import/src/Nodes/CircleUnpack.cpp
@@ -46,8 +46,8 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
    {
      if (settings->get(luci::UserSettings::Key::DisableValidation))
      {
-      const auto &tensors = args.reader.tensors();
-      const circle::TensorT &output_tensor = *tensors[outputs[0]];
+      const auto tensors = args.reader.tensors();
+      const auto output_tensor = tensors[outputs[0]];
        auto name = tensor_name(output_tensor);
        WARN(l) << "Warning: import Unpack(" << name << ") 'num' is not same as outputs used";
      }
@@ -58,9 +58,10 @@ bool CircleUnpackGraphBuilder::validate(const ValidateArgs &args) const
    if (options->num < 0)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-  const auto &shape = tensor->shape;
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  const auto shape = wrap(tensor->shape());
    auto shape_size = static_cast<int32_t>(shape.size());
    if (shape_size > 0)
    {
diff --git a/compiler/luci/import/src/Nodes/CircleVariable.cpp b/compiler/luci/import/src/Nodes/CircleVariable.cpp

new file mode 100644 (file)

index 0000000..23ae9e7
--- /dev/null
+++ b/compiler/luci/import/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/Nodes/CircleVariable.h"
+
+#include <luci/IR/Nodes/CircleVariable.h>
+#include <luci/Log.h>
+
+#include <cassert>
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::ostream &operator<<(std::ostream &os, const luci::VectorWrapper<int32_t> &vect)
+{
+  uint32_t seq = 0;
+  for (const auto &v : vect)
+  {
+    if (seq)
+      os << ", ";
+    os << v;
+    seq++;
+  }
+  return os;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleVariable *create_circlevariable(GraphBuilderContext *context, int32_t tensor_index)
+{
+  LOGGER(l);
+
+  auto graph = context->graph();
+  auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  const auto variable_tensor = tensors[tensor_index];
+  assert(variable_tensor != nullptr);
+
+  if (not variable_tensor->is_variable())
+  {
+    // not a variable
+    return nullptr;
+  }
+  {
+    // check if there is no buffer as we don't support this for now
+    // TODO use buffer when this is enabled in Kernel
+    assert(reader->buffers()[variable_tensor->buffer()] != nullptr);
+    assert(reader->buffers()[variable_tensor->buffer()]->data() == nullptr);
+  }
+
+  auto variable_node = graph->nodes()->create<CircleVariable>();
+  copy_tensor_attributes(variable_tensor, variable_node);
+  variable_node->shape_status(luci::ShapeStatus::VALID);
+
+  INFO(l) << "[luci] NodeFinder variable node(" << tensor_index << ") -> " << variable_node << " "
+          << wrap(variable_tensor->shape()) << std::endl;
+
+  return variable_node;
+}
+
+} // namespace luci
diff --git a/compiler/luci/import/src/Nodes/CircleWhere.cpp b/compiler/luci/import/src/Nodes/CircleWhere.cpp

index 8e4f1a0c4209dca66fc1070003397675b0e7d56f..bc6199acecd82954081e1cbd49cff3ed63f069b3 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleWhere.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhere.cpp
@@ -30,14 +30,16 @@ bool CircleWhereGraphBuilder::validate(const ValidateArgs &args) const
  
    const auto &inputs = args.op.inputs;
    const auto &outputs = args.op.outputs;
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_condition = tensors.at(inputs.at(0));
-  const auto &tensor_out = tensors.at(outputs[0]);
+  const auto tensors = args.reader.tensors();
+  const auto tensor_condition = tensors.at(inputs.at(0));
+  const auto tensor_out = tensors.at(outputs[0]);
+  assert(tensor_condition != nullptr);
+  assert(tensor_out != nullptr);
  
-  if (tensor_condition->type != circle::TensorType_BOOL)
+  if (tensor_condition->type() != circle::TensorType_BOOL)
      return false;
  
-  if (tensor_out->type != circle::TensorType_INT64)
+  if (tensor_out->type() != circle::TensorType_INT64)
      return false;
  
    return true;
diff --git a/compiler/luci/import/src/Nodes/CircleWhile.cpp b/compiler/luci/import/src/Nodes/CircleWhile.cpp

index 26147562f7cc0e0d144ae42b463a257fd1bc0ee0..27a392b2aa5e16528b54a4fb8199015a42f813ca 100644 (file)
--- a/compiler/luci/import/src/Nodes/CircleWhile.cpp
+++ b/compiler/luci/import/src/Nodes/CircleWhile.cpp
@@ -67,8 +67,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
  
    const std::vector<int32_t> &inputs = op.inputs;
    const std::vector<int32_t> &outputs = op.outputs;
-  const auto &tensors = context->reader()->tensors();
-  const auto &opcodes = context->reader()->opcodes();
+  const auto tensors = context->reader()->tensors();
+  const auto opcodes = context->reader()->opcodes();
  
    std::vector<CircleNode *> input_nodes;
    for (const int32_t input_tensor_index : inputs)
@@ -96,9 +96,11 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
    assert(outputs.size() > 0);
    {
      // Lets use name of output 0 as While name
-    const circle::TensorT &output_tensor = *tensors[outputs[0]];
+    const auto output_tensor = tensors[outputs[0]];
+    assert(output_tensor != nullptr);
      node->name(tensor_name(output_tensor));
-    node->op_version(opcodes[op.opcode_index].get()->version);
+    assert(opcodes[op.opcode_index] != nullptr);
+    node->op_version(opcodes[op.opcode_index]->version());
  
      // NOTE We don't set quantization for While itself but to virtual outputs
    }
@@ -106,7 +108,8 @@ CircleNode *CircleWhileGraphBuilder::build(const circle::OperatorT &op,
    // Create virtual outputs of While
    for (uint32_t n = 0; n < output_count; ++n)
    {
-    const circle::TensorT &output_tensor = *tensors[outputs[n]];
+    const auto output_tensor = tensors[outputs[n]];
+    assert(output_tensor != nullptr);
  
      auto *nodeout = graph->nodes()->create<CircleWhileOut>();
  
diff --git a/compiler/luci/import/src/ValidateHelpers.cpp b/compiler/luci/import/src/ValidateHelpers.cpp

index 27306ba90d67ce3757ac595dbc20f55bd90bb11e..fc027704bb8d8f9118f6371ea7e484b52fd7935f 100644 (file)
--- a/compiler/luci/import/src/ValidateHelpers.cpp
+++ b/compiler/luci/import/src/ValidateHelpers.cpp
@@ -26,9 +26,10 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
      return false;
  
    // input 1 and 2 should have INT32/INT64 type
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_1 = tensors.at(inputs.at(1));
-  switch (tensor_1->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_1 = tensors.at(inputs.at(1));
+  assert(tensor_1 != nullptr);
+  switch (tensor_1->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -36,8 +37,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
      default:
        return false;
    }
-  const auto &tensor_2 = tensors.at(inputs.at(2));
-  switch (tensor_2->type)
+  const auto tensor_2 = tensors.at(inputs.at(2));
+  assert(tensor_2 != nullptr);
+  switch (tensor_2->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
@@ -47,8 +49,9 @@ bool validate_batch_space_nd(const GraphBuilderBase::ValidateArgs &args)
    }
  
    // Only support input shape dimension 3 and 4 only
-  const auto &tensor_0 = tensors.at(inputs.at(0));
-  const auto t_0_s = tensor_0->shape.size();
+  const auto tensor_0 = tensors.at(inputs.at(0));
+  assert(tensor_0 != nullptr);
+  const auto t_0_s = wrap(tensor_0->shape()).size();
    if (t_0_s != 3 && t_0_s != 4)
      return false;
  
@@ -68,10 +71,10 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
    if (outputs.size() != 1)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor = tensors.at(inputs.at(0));
-
-  switch (tensor->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor = tensors.at(inputs.at(0));
+  assert(tensor != nullptr);
+  switch (tensor->type())
    {
      case circle::TensorType_FLOAT16:
      case circle::TensorType_FLOAT32:
@@ -84,10 +87,12 @@ bool validate_minmax(const GraphBuilderBase::ValidateArgs &args)
        return false;
    }
  
-  if (tensors[inputs.at(1)]->type != tensor->type)
+  assert(tensors[inputs.at(1)] != nullptr);
+  if (tensors[inputs.at(1)]->type() != tensor->type())
      return false;
  
-  if (tensors[outputs[0]]->type != tensor->type)
+  assert(tensors[outputs[0]] != nullptr);
+  if (tensors[outputs[0]]->type() != tensor->type())
      return false;
  
    return true;
@@ -104,10 +109,10 @@ bool validate_reduce_minmax(const GraphBuilderBase::ValidateArgs &args)
    if (outputs.size() != 1)
      return false;
  
-  const auto &tensors = args.reader.tensors();
-  const auto &tensor_axis = tensors.at(inputs.at(1));
-
-  switch (tensor_axis->type)
+  const auto tensors = args.reader.tensors();
+  const auto tensor_axis = tensors.at(inputs.at(1));
+  assert(tensor_axis != nullptr);
+  switch (tensor_axis->type())
    {
      case circle::TensorType_INT32:
      case circle::TensorType_INT64:
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.h b/compiler/luci/lang/include/luci/IR/CircleNodes.h

index a313f9d5b46e4f71045762a6ba47fc651a63a688..d89ea03cc8888614c9947f5e28f5956875456fbc 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.h
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.h
@@ -29,7 +29,6 @@
  #include "Nodes/CircleCast.h"
  #include "Nodes/CircleCeil.h"
  #include "Nodes/CircleConcatenation.h"
-#include "Nodes/CircleConst.h"
  #include "Nodes/CircleConv2D.h"
  #include "Nodes/CircleCos.h"
  #include "Nodes/CircleCustom.h"
@@ -119,6 +118,7 @@
  #include "Nodes/CircleStridedSlice.h"
  #include "Nodes/CircleSub.h"
  #include "Nodes/CircleSum.h"
+#include "Nodes/CircleSVDF.h"
  #include "Nodes/CircleTanh.h"
  #include "Nodes/CircleTile.h"
  #include "Nodes/CircleTopKV2.h"
@@ -135,18 +135,21 @@
  #include "Nodes/CircleBCQGather.h"
  #include "Nodes/CircleInstanceNorm.h"
  // Virtual nodes
+#include "Nodes/CircleConst.h"
  #include "Nodes/CircleInput.h"
  #include "Nodes/CircleOutput.h"
+#include "Nodes/CircleVariable.h"
+// Multi-output virtual nodes
  #include "Nodes/CircleBidirectionalSequenceLSTMOut.h"
  #include "Nodes/CircleCustomOut.h"
  #include "Nodes/CircleIfOut.h"
  #include "Nodes/CircleNonMaxSuppressionV4Out.h"
  #include "Nodes/CircleNonMaxSuppressionV5Out.h"
-#include "Nodes/CircleUnpackOut.h"
-#include "Nodes/CircleUniqueOut.h"
  #include "Nodes/CircleSplitOut.h"
  #include "Nodes/CircleSplitVOut.h"
  #include "Nodes/CircleTopKV2Out.h"
+#include "Nodes/CircleUniqueOut.h"
+#include "Nodes/CircleUnpackOut.h"
  #include "Nodes/CircleWhileOut.h"
  
  #include <loco/IR/Graph.h>
diff --git a/compiler/luci/lang/include/luci/IR/CircleNodes.lst b/compiler/luci/lang/include/luci/IR/CircleNodes.lst

index 914aa16e4e4b7760239b5a34e483dd78d0276616..1472008dfd8926ae32c179d4c44946907b483ea6 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleNodes.lst
+++ b/compiler/luci/lang/include/luci/IR/CircleNodes.lst
@@ -116,6 +116,7 @@ CIRCLE_NODE(SQUEEZE, CircleSqueeze)
  CIRCLE_NODE(STRIDED_SLICE, CircleStridedSlice)
  CIRCLE_NODE(SUB, CircleSub)
  CIRCLE_NODE(SUM, CircleSum)
+CIRCLE_NODE(SVDF, CircleSVDF)
  CIRCLE_NODE(TANH, CircleTanh)
  CIRCLE_NODE(TILE, CircleTile)
  CIRCLE_NODE(TOPK_V2, CircleTopKV2)
@@ -132,12 +133,14 @@ CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnected)
  CIRCLE_NODE(BCQ_GATHER, CircleBCQGather)
  CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNorm)
  // Virtual node(s)
-CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut)
  CIRCLE_VNODE(CIRCLECONST, CircleConst)
  CIRCLE_VNODE(CIRCLEINPUT, CircleInput)
  CIRCLE_VNODE(CIRCLEOUTPUT, CircleOutput)
  CIRCLE_VNODE(CIRCLEOUTPUTDUMMY, CircleOutputDummy)
  CIRCLE_VNODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExclude)
+CIRCLE_VNODE(CIRCLEVARIABLE, CircleVariable)
+// Multi-output virtual nodes
+CIRCLE_VNODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT, CircleBidirectionalSequenceLSTMOut)
  CIRCLE_VNODE(CIRCLECUSTOMOUT, CircleCustomOut)
  CIRCLE_VNODE(CIRCLEIFOUT, CircleIfOut)
  CIRCLE_VNODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4Out)
diff --git a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h

index 694437303b314e79bc877f0df028d4ded5b71afa..8afc80a76d16df287f276176f0ef1464b207729c 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
+++ b/compiler/luci/lang/include/luci/IR/CircleQuantParam.h
@@ -32,6 +32,10 @@ struct CircleQuantParam
    int32_t quantized_dimension{0};
  };
  
+struct CircleNode;
+
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst);
+
  } // namespace luci
  
  #endif // __LUCI_IR_CIRCLEQUANTPARAM_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h

index 2862cadb2cb52c3044738580d05f347460154266..dc5aeb267d61d3a3417eba398c1de4f0df4d4ffd 100644 (file)
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleFullyConnected.h
@@ -58,8 +58,12 @@ public:
    WeightsFormat weights_format(void) const { return _weights_format; }
    void weights_format(WeightsFormat weights_format) { _weights_format = weights_format; }
  
+  bool keep_num_dims(void) const { return _keep_num_dims; }
+  void keep_num_dims(bool keep_num_dims) { _keep_num_dims = keep_num_dims; }
+
  private:
    WeightsFormat _weights_format{WeightsFormat::DEFAULT};
+  bool _keep_num_dims{false};
  };
  
  } // namespace luci
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h

new file mode 100644 (file)

index 0000000..839d11e
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleSVDF.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_SVDF_H__
+#define __LUCI_IR_CIRCLE_SVDF_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/LuciNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief SVDF in Circle
+ */
+class CircleSVDF final : public FixedArityNode<5, CircleNodeImpl<CircleOpcode::SVDF>>,
+                         public CircleNodeMixin<CircleNodeTrait::FusedActFunc>
+{
+public:
+  CircleSVDF() = default;
+
+public:
+  loco::Node *input(void) const { return at(0)->node(); }
+  void input(loco::Node *node) { at(0)->node(node); }
+
+  loco::Node *weight_feature(void) const { return at(1)->node(); }
+  void weight_feature(loco::Node *node) { at(1)->node(node); }
+
+  loco::Node *weight_time(void) const { return at(2)->node(); }
+  void weight_time(loco::Node *node) { at(2)->node(node); }
+
+  loco::Node *bias(void) const { return at(3)->node(); }
+  void bias(loco::Node *node) { at(3)->node(node); }
+
+  loco::Node *input_activation_state(void) const { return at(4)->node(); }
+  void input_activation_state(loco::Node *node) { at(4)->node(node); }
+
+public:
+  bool asymmetric_quantize_inputs() const { return _asymmetric_quantize_inputs; }
+  void asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    _asymmetric_quantize_inputs = asymmetric_quantize_inputs;
+  }
+
+  int32_t svdf_rank() const { return _rank; }
+  void svdf_rank(int32_t svdf_rank) { _rank = svdf_rank; }
+
+private:
+  bool _asymmetric_quantize_inputs = false;
+  int32_t _rank = 0;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_SVDF_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h

new file mode 100644 (file)

index 0000000..8c15b66
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleVariable.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_CIRCLE_VARIABLE_H__
+#define __LUCI_IR_CIRCLE_VARIABLE_H__
+
+#include "luci/IR/CircleNodeDecl.h"
+#include "luci/IR/CircleOpcode.h"
+
+#include "luci/IR/CircleNodeMixins.h"
+
+namespace luci
+{
+
+/**
+ * @brief Virtual CircleVariable in Circle for 'variable' Tensor
+ */
+class CircleVariable final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLEVARIABLE>>
+{
+public:
+  CircleVariable() = default;
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_CIRCLE_VARIABLE_H__
diff --git a/compiler/luci/lang/src/CircleQuantParam.cpp b/compiler/luci/lang/src/CircleQuantParam.cpp

new file mode 100644 (file)

index 0000000..89671d3
--- /dev/null
+++ b/compiler/luci/lang/src/CircleQuantParam.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/CircleQuantParam.h"
+#include "luci/IR/CircleNode.h"
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief copy CircleQuantParam of src to dst
+ */
+void copy_quantparam(const luci::CircleNode *src, luci::CircleNode *dst)
+{
+  auto q = src->quantparam();
+  if (q == nullptr)
+    dst->quantparam(nullptr);
+  else
+  {
+    auto qparam = std::make_unique<luci::CircleQuantParam>();
+    qparam->scale = q->scale;
+    qparam->zerop = q->zerop;
+    qparam->min = q->min;
+    qparam->max = q->max;
+    qparam->quantized_dimension = q->quantized_dimension;
+
+    dst->quantparam(std::move(qparam));
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/lang/src/CircleQuantParam.test.cpp b/compiler/luci/lang/src/CircleQuantParam.test.cpp

new file mode 100644 (file)

index 0000000..520ca05
--- /dev/null
+++ b/compiler/luci/lang/src/CircleQuantParam.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// NOTE any node will do for testing
+#include "luci/IR/Nodes/CircleAdd.h"
+
+#include <loco/IR/Graph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+luci::CircleAdd *build_simple_add_graph(loco::Graph *g)
+{
+  auto node = g->nodes()->create<luci::CircleAdd>();
+
+  node->name("name");
+  node->dtype(loco::DataType::FLOAT32);
+  node->rank(1);
+  node->dim(0).set(3);
+  node->shape_status(luci::ShapeStatus::VALID);
+  node->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale = {1.0};
+  qparam->zerop = {0};
+  qparam->min = {0.0};
+  qparam->max = {1.0};
+  qparam->quantized_dimension = 0;
+  node->quantparam(std::move(qparam));
+
+  return node;
+}
+
+} // namespace
+
+TEST(CircleNodeCloneTest, copy_quantparam)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_quantparam(node, copy);
+
+  const auto *qparam_node = node->quantparam();
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_node->scale, qparam_copy->scale);
+  ASSERT_EQ(qparam_node->zerop, qparam_copy->zerop);
+  ASSERT_EQ(qparam_node->quantized_dimension, qparam_copy->quantized_dimension);
+}
+
+TEST(CircleNodeCloneTest, copy_quantparam_NEG)
+{
+  auto g = loco::make_graph();
+  auto node = build_simple_add_graph(g.get());
+
+  node->quantparam(nullptr);
+
+  auto copy = g->nodes()->create<luci::CircleAdd>();
+  luci::copy_quantparam(node, copy);
+
+  const auto *qparam_copy = copy->quantparam();
+  ASSERT_EQ(qparam_copy, nullptr);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp

index bb0e3c51b078edfbb306584872239c7cd531f6b0..15a78008516b71e34736a4a71adb6c80f02fdaca 100644 (file)
--- a/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleFullyConnected.test.cpp
@@ -32,6 +32,7 @@ TEST(CircleFullyConnectedTest, constructor)
    ASSERT_EQ(nullptr, fc_node.weights());
    ASSERT_EQ(nullptr, fc_node.bias());
    ASSERT_EQ(luci::FusedActFunc::UNDEFINED, fc_node.fusedActivationFunction());
+  ASSERT_EQ(false, fc_node.keep_num_dims());
  }
  
  TEST(CircleFullyConnectedTest, input_NEG)
diff --git a/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp

new file mode 100644 (file)

index 0000000..833ae07
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleSVDF.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleSVDFTest, constructor)
+{
+  luci::CircleSVDF svdf_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), svdf_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::SVDF, svdf_node.opcode());
+
+  ASSERT_EQ(nullptr, svdf_node.input());
+  ASSERT_EQ(nullptr, svdf_node.weight_feature());
+  ASSERT_EQ(nullptr, svdf_node.weight_time());
+  ASSERT_EQ(nullptr, svdf_node.bias());
+  ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+
+  ASSERT_EQ(false, svdf_node.asymmetric_quantize_inputs());
+  ASSERT_EQ(0, svdf_node.svdf_rank());
+}
+
+TEST(CircleSVDFTest, input_NEG)
+{
+  luci::CircleSVDF svdf_node;
+  luci::CircleSVDF node;
+
+  svdf_node.input(&node);
+  svdf_node.weight_feature(&node);
+  svdf_node.weight_time(&node);
+  svdf_node.bias(&node);
+  svdf_node.input_activation_state(&node);
+
+  ASSERT_NE(nullptr, svdf_node.input());
+  ASSERT_NE(nullptr, svdf_node.weight_feature());
+  ASSERT_NE(nullptr, svdf_node.weight_time());
+  ASSERT_NE(nullptr, svdf_node.bias());
+  ASSERT_NE(nullptr, svdf_node.input_activation_state());
+
+  svdf_node.input(nullptr);
+  svdf_node.weight_feature(nullptr);
+  svdf_node.weight_time(nullptr);
+  svdf_node.bias(nullptr);
+  svdf_node.input_activation_state(nullptr);
+
+  ASSERT_EQ(nullptr, svdf_node.input());
+  ASSERT_EQ(nullptr, svdf_node.weight_feature());
+  ASSERT_EQ(nullptr, svdf_node.weight_time());
+  ASSERT_EQ(nullptr, svdf_node.bias());
+  ASSERT_EQ(nullptr, svdf_node.input_activation_state());
+}
+
+TEST(CircleSVDFTest, arity_NEG)
+{
+  luci::CircleSVDF svdf_node;
+
+  ASSERT_NO_THROW(svdf_node.arg(4));
+  ASSERT_THROW(svdf_node.arg(5), std::out_of_range);
+}
+
+TEST(CircleSVDFTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleSVDF svdf_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
+
+TEST(CircleSVDFTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleSVDF svdf_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(svdf_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp

new file mode 100644 (file)

index 0000000..e1864f8
--- /dev/null
+++ b/compiler/luci/lang/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/IR/Nodes/CircleVariable.h"
+
+#include "luci/IR/CircleDialect.h"
+#include "luci/IR/CircleNodeVisitor.h"
+
+#include <gtest/gtest.h>
+
+TEST(CircleVariableTest, constructor)
+{
+  luci::CircleVariable var_node;
+
+  ASSERT_EQ(luci::CircleDialect::get(), var_node.dialect());
+  ASSERT_EQ(luci::CircleOpcode::CIRCLEVARIABLE, var_node.opcode());
+}
+
+TEST(CircleVariableTest, arity_NEG)
+{
+  luci::CircleVariable var_node;
+
+  ASSERT_THROW(var_node.arg(0), std::out_of_range);
+}
+
+TEST(CircleVariableTest, visit_mutable_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeMutableVisitor<void>
+  {
+  };
+
+  luci::CircleVariable var_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
+
+TEST(CircleVariableTest, visit_NEG)
+{
+  struct TestVisitor final : public luci::CircleNodeVisitor<void>
+  {
+  };
+
+  luci::CircleVariable var_node;
+
+  TestVisitor tv;
+  ASSERT_THROW(var_node.accept(&tv), std::exception);
+}
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt

index aed9fb79b326c297ce72636b469ba258d79d9668..b8a2111ddded5c2d1cf25fcfdcf7642f8552426c 100644 (file)
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,5 +1,7 @@
  # TODO Find how to test logging-ex utility
  file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
  
  if (NOT LUCI_LIBRARY_TYPE)
      set(LUCI_LIBRARY_TYPE "SHARED")
@@ -13,7 +15,17 @@ target_link_libraries(luci_logex PRIVATE luci_log)
  target_link_libraries(luci_logex PRIVATE luci_lang)
  target_link_libraries(luci_logex PRIVATE hermes_std)
  target_link_libraries(luci_logex PRIVATE nncc_common)
-target_link_libraries(luci_logex PRIVATE pepper_str)
  install(TARGETS luci_logex DESTINATION lib)
  install(DIRECTORY include/ DESTINATION include
          FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+    return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(luci_logex_test ${TESTS})
+target_include_directories(luci_logex_test PRIVATE src)
+target_link_libraries(luci_logex_test luci_logex)
+target_link_libraries(luci_logex_test luci_lang)
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp

new file mode 100644 (file)

index 0000000..eff0830
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.cpp
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleDialect.h>
+
+#include <memory>
+
+namespace
+{
+
+std::string circle_opname(luci::CircleOpcode opcode)
+{
+  static const std::string prefix{"circle."};
+
+  switch (opcode)
+  {
+#define CIRCLE_NODE(OPCODE, CLASS) \
+  case luci::CircleOpcode::OPCODE: \
+    return prefix + #OPCODE;
+#define CIRCLE_VNODE CIRCLE_NODE
+#include <luci/IR/CircleNodes.lst>
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+    default:
+      break;
+  };
+
+  return prefix + "Invalid";
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CircleNodeSummaryBuilder::build(const loco::Node *node, const locop::SymbolTable *tbl,
+                                     locop::NodeSummary &s)
+{
+  if (node->dialect() != luci::CircleDialect::get())
+    return false;
+
+  auto ptr_to_str = [](const void *ptr) {
+    std::stringstream ss;
+    ss << ptr;
+    return ss.str();
+  };
+
+  auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+  if (const auto builder = create_builder(circle_node))
+  {
+    if (!builder->validate(circle_node))
+    {
+      s.state(locop::NodeDesc::State::Invalid);
+      return false;
+    }
+
+    auto input_names = builder->get_input_names(circle_node);
+    assert(node->arity() == input_names.size());
+    for (uint32_t i = 0; i < node->arity(); ++i)
+      s.args().append(input_names.at(i), tbl->lookup(node->arg(i)));
+
+    builder->build_attributes(circle_node, s);
+    builder->update_status(s);
+
+    s.opname(circle_opname(circle_node->opcode()));
+    s.comments().append("[" + circle_node->name() + "] = " + ptr_to_str(node));
+
+    return true;
+  }
+  else
+  {
+    // When SummaryBuilder is not implemented, return false
+    return false;
+  }
+}
+
+bool CircleNodeSummaryBuilder::validate(const luci::CircleNode *) { return true; }
+
+std::vector<std::string> CircleNodeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  // Return empty names for default
+  return std::vector<std::string>();
+}
+
+void CircleNodeSummaryBuilder::build_attributes(const luci::CircleNode *, locop::NodeSummary &)
+{
+  // Do nothing for default
+}
+
+void CircleNodeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::Complete);
+}
+
+std::unique_ptr<CircleNodeSummaryBuilder>
+CircleNodeSummaryBuilder::create_builder(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+#define CIRCLE_NODE(OPCODE, CLASS)    \
+  case luci::CircleOpcode::OPCODE:    \
+  {                                   \
+    return std::make_unique<CLASS>(); \
+  }
+
+    CIRCLE_NODE(ABS, CircleAbsSummaryBuilder)
+    CIRCLE_NODE(ADD, CircleAddSummaryBuilder)
+    CIRCLE_NODE(ADD_N, CircleAddNSummaryBuilder)
+    CIRCLE_NODE(ARG_MAX, CircleArgMaxSummaryBuilder)
+    CIRCLE_NODE(ARG_MIN, CircleArgMinSummaryBuilder)
+    CIRCLE_NODE(AVERAGE_POOL_2D, CircleAveragePool2DSummaryBuilder)
+    CIRCLE_NODE(BATCH_MATMUL, CircleBatchMatMulSummaryBuilder)
+    CIRCLE_NODE(BATCH_TO_SPACE_ND, CircleBatchToSpaceNDSummaryBuilder)
+    CIRCLE_NODE(BCQ_FULLY_CONNECTED, CircleBCQFullyConnectedSummaryBuilder)
+    CIRCLE_NODE(BCQ_GATHER, CircleBCQGatherSummaryBuilder)
+    CIRCLE_NODE(BIDIRECTIONAL_SEQUENCE_LSTM, CircleBidirectionalSequenceLSTMSummaryBuilder)
+    CIRCLE_NODE(CAST, CircleCastSummaryBuilder)
+    CIRCLE_NODE(CEIL, CircleCeilSummaryBuilder)
+    CIRCLE_NODE(CONCATENATION, CircleConcatenationSummaryBuilder)
+    CIRCLE_NODE(CIRCLECONST, CircleConstSummaryBuilder)
+    CIRCLE_NODE(CONV_2D, CircleConv2DSummaryBuilder)
+    CIRCLE_NODE(COS, CircleCosSummaryBuilder)
+    CIRCLE_NODE(CUSTOM, CircleCustomSummaryBuilder)
+    CIRCLE_NODE(DEPTH_TO_SPACE, CircleDepthToSpaceSummaryBuilder)
+    CIRCLE_NODE(DEPTHWISE_CONV_2D, CircleDepthwiseConv2DSummaryBuilder)
+    CIRCLE_NODE(DEQUANTIZE, CircleDequantizeSummaryBuilder)
+    CIRCLE_NODE(DIV, CircleDivSummaryBuilder)
+    CIRCLE_NODE(ELU, CircleEluSummaryBuilder)
+    CIRCLE_NODE(EQUAL, CircleEqualSummaryBuilder)
+    CIRCLE_NODE(EXP, CircleExpSummaryBuilder)
+    CIRCLE_NODE(EXPAND_DIMS, CircleExpandDimsSummaryBuilder)
+    CIRCLE_NODE(FAKE_QUANT, CircleFakeQuantSummaryBuilder)
+    CIRCLE_NODE(FILL, CircleFillSummaryBuilder)
+    CIRCLE_NODE(FLOOR, CircleFloorSummaryBuilder)
+    CIRCLE_NODE(FLOOR_DIV, CircleFloorDivSummaryBuilder)
+    CIRCLE_NODE(FLOOR_MOD, CircleFloorModSummaryBuilder)
+    CIRCLE_NODE(FULLY_CONNECTED, CircleFullyConnectedSummaryBuilder)
+    CIRCLE_NODE(GATHER, CircleGatherSummaryBuilder)
+    CIRCLE_NODE(GATHER_ND, CircleGatherNdSummaryBuilder)
+    CIRCLE_NODE(GREATER, CircleGreaterSummaryBuilder)
+    CIRCLE_NODE(GREATER_EQUAL, CircleGreaterEqualSummaryBuilder)
+    CIRCLE_NODE(IF, CircleIfSummaryBuilder)
+    CIRCLE_NODE(INSTANCE_NORM, CircleInstanceNormSummaryBuilder)
+    CIRCLE_NODE(L2_NORMALIZATION, CircleL2NormalizeSummaryBuilder)
+    CIRCLE_NODE(L2_POOL_2D, CircleL2Pool2DSummaryBuilder)
+    CIRCLE_NODE(LEAKY_RELU, CircleLeakyReluSummaryBuilder)
+    CIRCLE_NODE(LESS, CircleLessSummaryBuilder)
+    CIRCLE_NODE(LESS_EQUAL, CircleLessEqualSummaryBuilder)
+    CIRCLE_NODE(LOCAL_RESPONSE_NORMALIZATION, CircleLocalResponseNormalizationSummaryBuilder)
+    CIRCLE_NODE(LOG, CircleLogSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_AND, CircleLogicalAndSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_NOT, CircleLogicalNotSummaryBuilder)
+    CIRCLE_NODE(LOGICAL_OR, CircleLogicalOrSummaryBuilder)
+    CIRCLE_NODE(LOGISTIC, CircleLogisticSummaryBuilder)
+    CIRCLE_NODE(LOG_SOFTMAX, CircleLogSoftmaxSummaryBuilder)
+    CIRCLE_NODE(MATRIX_DIAG, CircleMatrixDiagSummaryBuilder)
+    CIRCLE_NODE(MATRIX_SET_DIAG, CircleMatrixSetDiagSummaryBuilder)
+    CIRCLE_NODE(MAXIMUM, CircleMaximumSummaryBuilder)
+    CIRCLE_NODE(MAX_POOL_2D, CircleMaxPool2DSummaryBuilder)
+    CIRCLE_NODE(MEAN, CircleMeanSummaryBuilder)
+    CIRCLE_NODE(MINIMUM, CircleMinimumSummaryBuilder)
+    CIRCLE_NODE(MIRROR_PAD, CircleMirrorPadSummaryBuilder)
+    CIRCLE_NODE(MUL, CircleMulSummaryBuilder)
+    CIRCLE_NODE(NEG, CircleNegSummaryBuilder)
+    CIRCLE_NODE(NON_MAX_SUPPRESSION_V4, CircleNonMaxSuppressionV4SummaryBuilder)
+    CIRCLE_NODE(NON_MAX_SUPPRESSION_V5, CircleNonMaxSuppressionV5SummaryBuilder)
+    CIRCLE_NODE(NOT_EQUAL, CircleNotEqualSummaryBuilder)
+    CIRCLE_NODE(ONE_HOT, CircleOneHotSummaryBuilder)
+    CIRCLE_NODE(PACK, CirclePackSummaryBuilder)
+    CIRCLE_NODE(PAD, CirclePadSummaryBuilder)
+    CIRCLE_NODE(PADV2, CirclePadV2SummaryBuilder)
+    CIRCLE_NODE(POW, CirclePowSummaryBuilder)
+    CIRCLE_NODE(PRELU, CirclePReluSummaryBuilder)
+    CIRCLE_NODE(QUANTIZE, CircleQuantizeSummaryBuilder)
+    CIRCLE_NODE(RANGE, CircleRangeSummaryBuilder)
+    CIRCLE_NODE(RANK, CircleRankSummaryBuilder)
+    CIRCLE_NODE(REDUCE_ANY, CircleReduceAnySummaryBuilder)
+    CIRCLE_NODE(REDUCE_MAX, CircleReduceMaxSummaryBuilder)
+    CIRCLE_NODE(REDUCE_MIN, CircleReduceMinSummaryBuilder)
+    CIRCLE_NODE(REDUCE_PROD, CircleReduceProdSummaryBuilder)
+    CIRCLE_NODE(RELU, CircleReluSummaryBuilder)
+    CIRCLE_NODE(RELU6, CircleRelu6SummaryBuilder)
+    CIRCLE_NODE(RELU_N1_TO_1, CircleReluN1To1SummaryBuilder)
+    CIRCLE_NODE(RESHAPE, CircleReshapeSummaryBuilder)
+    CIRCLE_NODE(RESIZE_BILINEAR, CircleResizeBilinearSummaryBuilder)
+    CIRCLE_NODE(RESIZE_NEAREST_NEIGHBOR, CircleResizeNearestNeighborSummaryBuilder)
+    CIRCLE_NODE(REVERSE_SEQUENCE, CircleReverseSequenceSummaryBuilder)
+    CIRCLE_NODE(REVERSE_V2, CircleReverseV2SummaryBuilder)
+    CIRCLE_NODE(ROUND, CircleRoundSummaryBuilder)
+    CIRCLE_NODE(RSQRT, CircleRsqrtSummaryBuilder)
+    CIRCLE_NODE(SCATTER_ND, CircleScatterNdSummaryBuilder)
+    CIRCLE_NODE(SEGMENT_SUM, CircleSegmentSumSummaryBuilder)
+    CIRCLE_NODE(SELECT, CircleSelectSummaryBuilder)
+    CIRCLE_NODE(SELECT_V2, CircleSelectV2SummaryBuilder)
+    CIRCLE_NODE(SHAPE, CircleShapeSummaryBuilder)
+    CIRCLE_NODE(SIN, CircleSinSummaryBuilder)
+    CIRCLE_NODE(SLICE, CircleSliceSummaryBuilder)
+    CIRCLE_NODE(SOFTMAX, CircleSoftmaxSummaryBuilder)
+    CIRCLE_NODE(SPACE_TO_BATCH_ND, CircleSpaceToBatchNDSummaryBuilder)
+    CIRCLE_NODE(SPACE_TO_DEPTH, CircleSpaceToDepthSummaryBuilder)
+    CIRCLE_NODE(SPARSE_TO_DENSE, CircleSparseToDenseSummaryBuilder)
+    CIRCLE_NODE(SPLIT, CircleSplitSummaryBuilder)
+    CIRCLE_NODE(SPLIT_V, CircleSplitVSummaryBuilder)
+    CIRCLE_NODE(SQRT, CircleSqrtSummaryBuilder)
+    CIRCLE_NODE(SQUARE, CircleSquareSummaryBuilder)
+    CIRCLE_NODE(SQUARED_DIFFERENCE, CircleSquaredDifferenceSummaryBuilder)
+    CIRCLE_NODE(SQUEEZE, CircleSqueezeSummaryBuilder)
+    CIRCLE_NODE(STRIDED_SLICE, CircleStridedSliceSummaryBuilder)
+    CIRCLE_NODE(SUB, CircleSubSummaryBuilder)
+    CIRCLE_NODE(SUM, CircleSumSummaryBuilder)
+    CIRCLE_NODE(SVDF, CircleSVDFSummaryBuilder)
+    CIRCLE_NODE(TANH, CircleTanhSummaryBuilder)
+    CIRCLE_NODE(TILE, CircleTileSummaryBuilder)
+    CIRCLE_NODE(TOPK_V2, CircleTopKV2SummaryBuilder)
+    CIRCLE_NODE(TRANSPOSE, CircleTransposeSummaryBuilder)
+    CIRCLE_NODE(TRANSPOSE_CONV, CircleTransposeConvSummaryBuilder)
+    CIRCLE_NODE(UNIDIRECTIONAL_SEQUENCE_LSTM, CircleUnidirectionalSequenceLSTMSummaryBuilder)
+    CIRCLE_NODE(UNIQUE, CircleUniqueSummaryBuilder)
+    CIRCLE_NODE(UNPACK, CircleUnpackSummaryBuilder)
+    CIRCLE_NODE(WHERE, CircleWhereSummaryBuilder)
+    CIRCLE_NODE(WHILE, CircleWhileSummaryBuilder)
+    CIRCLE_NODE(ZEROS_LIKE, CircleZerosLikeSummaryBuilder)
+
+    CIRCLE_NODE(CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT,
+                CircleBidirectionalSequenceLSTMOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLECUSTOMOUT, CircleCustomOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEIFOUT, CircleIfOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEINPUT, CircleInputSummaryBuilder)
+    CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV4OUT, CircleNonMaxSuppressionV4OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLENONMAXSUPPRESSIONV5OUT, CircleNonMaxSuppressionV5OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUT, CircleOutputSummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUTDUMMY, CircleOutputDummySummaryBuilder)
+    CIRCLE_NODE(CIRCLEOUTPUTEXCLUDE, CircleOutputExcludeSummaryBuilder)
+    CIRCLE_NODE(CIRCLESPLITOUT, CircleSplitOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLESPLITVOUT, CircleSplitVOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLETOPKV2OUT, CircleTopKV2OutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEUNIQUEOUT, CircleUniqueOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEUNPACKOUT, CircleUnpackOutSummaryBuilder)
+    CIRCLE_NODE(CIRCLEVARIABLE, CircleVariableSummaryBuilder)
+    CIRCLE_NODE(CIRCLEWHILEOUT, CircleWhileOutSummaryBuilder)
+
+    default:
+      return nullptr;
+
+#undef CIRCLE_NODE
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.h b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h

new file mode 100644 (file)

index 0000000..e21d773
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
+
+#include <luci/IR/CircleNode.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <memory>
+#include <sstream>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeSummaryBuilder
+{
+public:
+  bool build(const loco::Node *node, const locop::SymbolTable *tbl, locop::NodeSummary &s);
+
+private:
+  /**
+   * @brief Template methods for building node summary.
+   *        Default behavior is building a node which has no input.
+   */
+  virtual bool validate(const luci::CircleNode *node);
+  virtual std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  virtual void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+  virtual void update_status(locop::NodeSummary &s);
+
+private:
+  std::unique_ptr<CircleNodeSummaryBuilder> create_builder(const luci::CircleNode *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDER__
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp

new file mode 100644 (file)

index 0000000..89ea213
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilder.test.cpp
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <locop/NodeSummary.h>
+#include <locop/SymbolTable.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class MockSymbolTable : public locop::SymbolTable
+{
+  std::string lookup(const loco::Node *) const override
+  {
+    return "Do nothing because it is mocking Symbol Table!";
+  }
+};
+
+class CircleNodeSummaryBuilderTest : public ::testing::Test
+{
+protected:
+  bool mock_build(const loco::Node *node)
+  {
+    return luci::CircleNodeSummaryBuilder().build(node, &_tbl, _s);
+  }
+
+protected:
+  MockSymbolTable _tbl;
+  locop::NodeSummary _s;
+};
+
+} // namespace
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate)
+{
+  luci::CircleAdd node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Add_validate_fused_NEG)
+{
+  luci::CircleAdd node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_fused_NEG)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, AveragePool2D_validate_padding_NEG)
+{
+  luci::CircleAveragePool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate)
+{
+  luci::CircleBCQFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, BCQFullyConnected_validate_fused_NEG)
+{
+  luci::CircleBCQFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate)
+{
+  luci::CircleConcatenation node(2);
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Concatenation_validate_fused_NEG)
+{
+  luci::CircleConcatenation node(2);
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_fused_NEG)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Conv2D_validate_padding_NEG)
+{
+  luci::CircleConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_fused_NEG)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, DepthwiseConv2D_validate_padding_NEG)
+{
+  luci::CircleDepthwiseConv2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate)
+{
+  luci::CircleFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, FullyConnected_validate_fused_NEG)
+{
+  luci::CircleFullyConnected node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate)
+{
+  luci::CircleInstanceNorm node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, InstanceNorm_validate_fused_NEG)
+{
+  luci::CircleInstanceNorm node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate)
+{
+  luci::CircleL2Normalize node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Normalize_validate_fused_NEG)
+{
+  luci::CircleL2Normalize node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_fused_NEG)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, L2Pool2D_validate_padding_NEG)
+{
+  luci::CircleL2Pool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_fused_NEG)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  node.padding(luci::Padding::SAME);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MaxPool2D_validate_padding_NEG)
+{
+  luci::CircleMaxPool2D node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate)
+{
+  luci::CircleMirrorPad node;
+  node.mode(luci::MirrorPadMode::REFLECT);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, MirrorPad_validate_mirror_padding_NEG)
+{
+  luci::CircleMirrorPad node;
+  node.mode(luci::MirrorPadMode::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate)
+{
+  luci::CircleMul node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, Mul_validate_fused_NEG)
+{
+  luci::CircleMul node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate)
+{
+  luci::CircleSVDF node;
+  node.fusedActivationFunction(luci::FusedActFunc::RELU);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, SVDF_validate_fused_NEG)
+{
+  luci::CircleSVDF node;
+  node.fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate)
+{
+  luci::CircleTransposeConv node;
+  node.padding(luci::Padding::SAME);
+  EXPECT_TRUE(mock_build(&node));
+}
+
+TEST_F(CircleNodeSummaryBuilderTest, TransposeConv_validate_padding_NEG)
+{
+  luci::CircleTransposeConv node;
+  node.padding(luci::Padding::UNDEFINED);
+  EXPECT_FALSE(mock_build(&node));
+}
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp

new file mode 100644 (file)

index 0000000..6df9270
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.cpp
@@ -0,0 +1,1128 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleNodeSummaryBuilders.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/IR/CircleNodes.h>
+#include <loco/IR/Node.h>
+
+#include <string>
+#include <vector>
+
+namespace
+{
+
+std::string to_str(loco::DataType type)
+{
+  switch (type)
+  {
+    case loco::DataType::U8:
+      return "UINT8";
+    case loco::DataType::U16:
+      return "UINT16";
+    case loco::DataType::U32:
+      return "UINT32";
+    case loco::DataType::U64:
+      return "UINT64";
+
+    case loco::DataType::S8:
+      return "INT8";
+    case loco::DataType::S16:
+      return "INT16";
+    case loco::DataType::S32:
+      return "INT32";
+    case loco::DataType::S64:
+      return "INT64";
+
+    case loco::DataType::FLOAT16:
+      return "FLOAT16";
+    case loco::DataType::FLOAT32:
+      return "FLOAT32";
+    case loco::DataType::FLOAT64:
+      return "FLOAT64";
+
+    case loco::DataType::BOOL:
+      return "BOOL";
+
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(bool value) { return value ? "true" : "false"; }
+
+std::string to_str(luci::FusedActFunc fused)
+{
+  switch (fused)
+  {
+    case luci::FusedActFunc::NONE:
+      return "NONE";
+    case luci::FusedActFunc::RELU:
+      return "RELU";
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return "RELU_N1_TO_1";
+    case luci::FusedActFunc::RELU6:
+      return "RELU6";
+    case luci::FusedActFunc::TANH:
+      return "TANH";
+    case luci::FusedActFunc::SIGN_BIT:
+      return "SIGN_BIT";
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(luci::Padding padding)
+{
+  switch (padding)
+  {
+    case luci::Padding::SAME:
+      return "SAME";
+    case luci::Padding::VALID:
+      return "VALID";
+    default:
+      return "Error";
+  }
+}
+
+std::string to_str(const luci::Stride *stride)
+{
+  return std::to_string(stride->h()) + "," + std::to_string(stride->w());
+}
+
+std::string to_str(const luci::Filter *filter)
+{
+  return std::to_string(filter->h()) + "," + std::to_string(filter->w());
+}
+
+std::string to_str(luci::MirrorPadMode mode)
+{
+  switch (mode)
+  {
+    case luci::MirrorPadMode::REFLECT:
+      return "REFLECT";
+    case luci::MirrorPadMode::SYMMETRIC:
+      return "SYMMETRIC";
+    default:
+      return "Error";
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+std::vector<std::string> CircleNodeWithXSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+std::vector<std::string>
+CircleNodeWithINPUTSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input"};
+}
+
+std::vector<std::string> CircleNodeWithXYSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x", "y"};
+}
+
+std::vector<std::string>
+CircleNodeWithFEATURESSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"features"};
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool CircleAddSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto add = loco::must_cast<const luci::CircleAdd *>(node);
+  if (add->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+void CircleAddSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto add = loco::must_cast<const luci::CircleAdd *>(node);
+  s.args().append("fused_activation_function", to_str(add->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleAddNSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "inputs");
+}
+
+std::vector<std::string> CircleArgMaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "dimension"};
+}
+
+void CircleArgMaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto argmax = loco::must_cast<const luci::CircleArgMax *>(node);
+  s.args().append("output_type", to_str(argmax->output_type()));
+}
+
+std::vector<std::string> CircleArgMinSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "dimension"};
+}
+
+void CircleArgMinSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto argmin = loco::must_cast<const luci::CircleArgMin *>(node);
+  s.args().append("output_type", to_str(argmin->output_type()));
+}
+
+bool CircleAveragePool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+  if (avgpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (avgpool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleAveragePool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleAveragePool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto avgpool = loco::must_cast<const luci::CircleAveragePool2D *>(node);
+  s.args().append("filter(h,w)", to_str(avgpool->filter()));
+  s.args().append("stride(h,w)", to_str(avgpool->stride()));
+  s.args().append("padding", to_str(avgpool->padding()));
+  s.args().append("fused_activation_function", to_str(avgpool->fusedActivationFunction()));
+}
+
+void CircleBatchMatMulSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                       locop::NodeSummary &s)
+{
+  auto batchmatmul = loco::must_cast<const luci::CircleBatchMatMul *>(node);
+  s.args().append("adj_x", to_str(batchmatmul->adj_x()));
+  s.args().append("adj_y", to_str(batchmatmul->adj_y()));
+}
+
+std::vector<std::string>
+CircleBatchToSpaceNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "block_shape", "crops"};
+}
+
+bool CircleBCQFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+  if (bcq_fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleBCQFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weights_scales", "weights_binary", "bias", "weights_clusters"};
+}
+
+void CircleBCQFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                             locop::NodeSummary &s)
+{
+  auto bcq_fc = loco::must_cast<const luci::CircleBCQFullyConnected *>(node);
+  s.args().append("fused_activation_function", to_str(bcq_fc->fusedActivationFunction()));
+  s.args().append("weights_hidden_size", std::to_string(bcq_fc->weights_hidden_size()));
+}
+
+std::vector<std::string> CircleBCQGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input_scales", "input_binary", "indices", "input_clusters"};
+}
+
+void CircleBCQGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto bcq_gather = loco::must_cast<const luci::CircleBCQGather *>(node);
+  s.args().append("axis", std::to_string(bcq_gather->axis()));
+  s.args().append("input_hidden_size", std::to_string(bcq_gather->input_hidden_size()));
+}
+
+std::vector<std::string>
+CircleBidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input",
+          "fw_input_to_input_weights",
+          "fw_input_to_forget_weights",
+          "fw_input_to_cell_weights",
+          "fw_input_to_output_weights",
+          "fw_recurrent_to_input_weights",
+          "fw_recurrent_to_forget_weights",
+          "fw_recurrent_to_cell_weights",
+          "fw_recurrent_to_output_weights",
+          "fw_cell_to_input_weights",
+          "fw_cell_to_forget_weights",
+          "fw_cell_to_output_weights",
+          "fw_input_gate_bias",
+          "fw_forget_gate_bias",
+          "fw_cell_gate_bias",
+          "fw_output_gate_bias",
+          "fw_projection_weights",
+          "fw_projection_bias",
+          "bw_input_to_input_weights",
+          "bw_input_to_forget_weights",
+          "bw_input_to_cell_weights",
+          "bw_input_to_output_weights",
+          "bw_recurrent_to_input_weights",
+          "bw_recurrent_to_forget_weights",
+          "bw_recurrent_to_cell_weights",
+          "bw_recurrent_to_output_weights",
+          "bw_cell_to_input_weights",
+          "bw_cell_to_forget_weights",
+          "bw_cell_to_output_weights",
+          "bw_input_gate_bias",
+          "bw_forget_gate_bias",
+          "bw_cell_gate_bias",
+          "bw_output_gate_bias",
+          "bw_projection_weights",
+          "bw_projection_bias",
+          "fw_activation_state",
+          "fw_cell_state",
+          "bw_activation_state",
+          "bw_cell_state",
+          "auxillary_input",
+          "fw_auxillary_input_to_input_weights",
+          "fw_auxillary_input_to_forget_weights",
+          "fw_auxillary_input_to_cell_weights",
+          "fw_auxillary_input_to_output_weights",
+          "bw_auxillary_input_to_input_weights",
+          "bw_auxillary_input_to_forget_weights",
+          "bw_auxillary_input_to_cell_weights",
+          "bw_auxillary_input_to_output_weights"};
+}
+
+void CircleBidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                     locop::NodeSummary &s)
+{
+  auto lstm = loco::must_cast<const luci::CircleBidirectionalSequenceLSTM *>(node);
+  s.args().append("cell_clip", to_str(lstm->cell_clip()));
+  s.args().append("proj_clip", to_str(lstm->proj_clip()));
+  s.args().append("merge_outputs", to_str(lstm->merge_outputs()));
+  s.args().append("time_major", to_str(lstm->time_major()));
+  s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+std::vector<std::string> CircleCastSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+void CircleCastSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto cast = loco::must_cast<const luci::CircleCast *>(node);
+  s.args().append("in_data_type", to_str(cast->in_data_type()));
+  s.args().append("out_data_type", to_str(cast->out_data_type()));
+}
+
+bool CircleConcatenationSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+  if (concat->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleConcatenationSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "values");
+}
+
+void CircleConcatenationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto concat = loco::must_cast<const luci::CircleConcatenation *>(node);
+  s.args().append("axis", std::to_string(concat->axis()));
+  s.args().append("fused_activation_function", to_str(concat->fusedActivationFunction()));
+}
+
+void CircleConstSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+bool CircleConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+  if (conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (conv2d->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "filter", "bias"};
+}
+
+void CircleConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto conv2d = loco::must_cast<const luci::CircleConv2D *>(node);
+  s.args().append("stride(h,w)", to_str(conv2d->stride()));
+  s.args().append("dilation(h,w)", to_str(conv2d->dilation()));
+  s.args().append("padding", to_str(conv2d->padding()));
+  s.args().append("fused_activation_function", to_str(conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleCustomSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto input_names = std::vector<std::string>();
+  for (uint32_t i = 0; i < node->arity(); ++i)
+    input_names.push_back("input" + std::to_string(i));
+  return input_names;
+}
+
+void CircleCustomSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto custom = loco::must_cast<const luci::CircleCustom *>(node);
+  s.args().append("custom_code", custom->custom_code());
+}
+
+void CircleDepthToSpaceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto depth_to_space = loco::must_cast<const luci::CircleDepthToSpace *>(node);
+  s.args().append("block_size", std::to_string(depth_to_space->block_size()));
+}
+
+bool CircleDepthwiseConv2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+  if (dw_conv2d->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (dw_conv2d->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleDepthwiseConv2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "filter", "bias"};
+}
+
+void CircleDepthwiseConv2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                           locop::NodeSummary &s)
+{
+  auto dw_conv2d = loco::must_cast<const luci::CircleDepthwiseConv2D *>(node);
+  s.args().append("stride(h,w)", to_str(dw_conv2d->stride()));
+  s.args().append("dilation(h,w)", to_str(dw_conv2d->dilation()));
+  s.args().append("padding", to_str(dw_conv2d->padding()));
+  s.args().append("depthMultiplier", std::to_string(dw_conv2d->depthMultiplier()));
+  s.args().append("fused_activation_function", to_str(dw_conv2d->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleExpandDimsSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "axis"};
+}
+
+std::vector<std::string> CircleFakeQuantSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"inputs"};
+}
+
+void CircleFakeQuantSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto fake_quant = loco::must_cast<const luci::CircleFakeQuant *>(node);
+  s.args().append("min", std::to_string(fake_quant->min()));
+  s.args().append("max", std::to_string(fake_quant->max()));
+  s.args().append("num_bits", std::to_string(fake_quant->num_bits()));
+  s.args().append("narrow_range", to_str(fake_quant->narrow_range()));
+}
+
+std::vector<std::string> CircleFillSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"dims", "value"};
+}
+
+bool CircleFullyConnectedSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+  if (fc->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleFullyConnectedSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weights", "bias"};
+}
+
+void CircleFullyConnectedSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                          locop::NodeSummary &s)
+{
+  auto fc = loco::must_cast<const luci::CircleFullyConnected *>(node);
+  s.args().append("fused_activation_function", to_str(fc->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleGatherSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"params", "indices"};
+}
+
+void CircleGatherSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto gather = loco::must_cast<const luci::CircleGather *>(node);
+  s.args().append("axis", std::to_string(gather->axis()));
+}
+
+std::vector<std::string> CircleGatherNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"params", "indices"};
+}
+
+std::vector<std::string> CircleIfSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+  auto input_names = std::vector<std::string>();
+  input_names.push_back("cond");
+  for (uint32_t i = 0; i < circle_if->input_count(); ++i)
+    input_names.push_back("input");
+
+  return input_names;
+}
+
+void CircleIfSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto circle_if = loco::must_cast<const luci::CircleIf *>(node);
+
+  if (circle_if->then_graph() != nullptr)
+    s.args().append("then_graph", circle_if->then_graph()->name());
+  else
+    s.args().append("then_branch", std::to_string(circle_if->then_branch()));
+
+  if (circle_if->else_graph() != nullptr)
+    s.args().append("else_graph", circle_if->else_graph()->name());
+  else
+    s.args().append("else_branch", std::to_string(circle_if->else_branch()));
+}
+
+bool CircleInstanceNormSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+  if (instnorm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleInstanceNormSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "gamma", "beta"};
+}
+
+void CircleInstanceNormSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto instnorm = loco::must_cast<const luci::CircleInstanceNorm *>(node);
+  s.args().append("epsilon", std::to_string(instnorm->epsilon()));
+  s.args().append("fused_activation_function", to_str(instnorm->fusedActivationFunction()));
+}
+
+bool CircleL2NormalizeSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+  if (l2norm->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleL2NormalizeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"x"};
+}
+
+void CircleL2NormalizeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                       locop::NodeSummary &s)
+{
+  auto l2norm = loco::must_cast<const luci::CircleL2Normalize *>(node);
+  s.args().append("fused_activation_function", to_str(l2norm->fusedActivationFunction()));
+}
+
+bool CircleL2Pool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+  if (l2pool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (l2pool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleL2Pool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleL2Pool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                    locop::NodeSummary &s)
+{
+  auto l2pool = loco::must_cast<const luci::CircleL2Pool2D *>(node);
+  s.args().append("filter(h,w)", to_str(l2pool->filter()));
+  s.args().append("stride(h,w)", to_str(l2pool->stride()));
+  s.args().append("padding", to_str(l2pool->padding()));
+  s.args().append("fused_activation_function", to_str(l2pool->fusedActivationFunction()));
+}
+
+void CircleLeakyReluSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto leaky_relu = loco::must_cast<const luci::CircleLeakyRelu *>(node);
+  s.args().append("alpha", std::to_string(leaky_relu->alpha()));
+}
+
+void CircleLocalResponseNormalizationSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                      locop::NodeSummary &s)
+{
+  auto lrn = loco::must_cast<const luci::CircleLocalResponseNormalization *>(node);
+  s.args().append("radius", std::to_string(lrn->radius()));
+  s.args().append("bias", std::to_string(lrn->bias()));
+  s.args().append("alpha", std::to_string(lrn->alpha()));
+  s.args().append("beta", std::to_string(lrn->beta()));
+}
+
+std::vector<std::string> CircleLogSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"logits"};
+}
+
+std::vector<std::string> CircleMatrixDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"diagonal"};
+}
+
+std::vector<std::string>
+CircleMatrixSetDiagSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "diagonal"};
+}
+
+bool CircleMaxPool2DSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+  if (maxpool->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+  if (maxpool->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleMaxPool2DSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleMaxPool2DSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto maxpool = loco::must_cast<const luci::CircleMaxPool2D *>(node);
+  s.args().append("filter(h,w)", to_str(maxpool->filter()));
+  s.args().append("stride(h,w)", to_str(maxpool->stride()));
+  s.args().append("padding", to_str(maxpool->padding()));
+  s.args().append("fused_activation_function", to_str(maxpool->fusedActivationFunction()));
+}
+
+bool CircleMirrorPadSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+  if (mirror_pad->mode() == luci::MirrorPadMode::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleMirrorPadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings"};
+}
+
+void CircleMirrorPadSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                     locop::NodeSummary &s)
+{
+  auto mirror_pad = loco::must_cast<const luci::CircleMirrorPad *>(node);
+  s.args().append("mode", to_str(mirror_pad->mode()));
+}
+
+bool CircleMulSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto mul = loco::must_cast<const luci::CircleMul *>(node);
+  if (mul->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+void CircleMulSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto mul = loco::must_cast<const luci::CircleMul *>(node);
+  s.args().append("fused_activation_function", to_str(mul->fusedActivationFunction()));
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV4SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"boxes", "scores", "max_output_size", "iou_threshold", "score_threshold"};
+}
+
+std::vector<std::string>
+CircleNonMaxSuppressionV5SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"boxes",         "scores",          "max_output_size",
+          "iou_threshold", "score_threshold", "soft_nms_sigma"};
+}
+
+std::vector<std::string> CircleOneHotSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "depth", "on_value", "off_value"};
+}
+
+void CircleOneHotSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto onehot = loco::must_cast<const luci::CircleOneHot *>(node);
+  s.args().append("axis", std::to_string(onehot->axis()));
+}
+
+std::vector<std::string> CirclePackSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  return std::vector<std::string>(node->arity(), "values");
+}
+
+void CirclePackSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto pack = loco::must_cast<const luci::CirclePack *>(node);
+  s.args().append("values_count", std::to_string(pack->values_count()));
+  s.args().append("axis", std::to_string(pack->axis()));
+}
+
+std::vector<std::string> CirclePadSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings"};
+}
+
+std::vector<std::string> CirclePadV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "paddings", "constant_values"};
+}
+
+std::vector<std::string> CirclePReluSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "alpha"};
+}
+
+std::vector<std::string> CircleRangeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"start", "limit", "delta"};
+}
+
+std::vector<std::string> CircleReshapeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"tensor", "shape"};
+}
+
+void CircleReshapeSummaryBuilder::update_status(locop::NodeSummary &s)
+{
+  s.state(locop::NodeDesc::State::PartiallyKnown);
+}
+
+std::vector<std::string>
+CircleResizeBilinearSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size"};
+}
+
+void CircleResizeBilinearSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                          locop::NodeSummary &s)
+{
+  auto resize_bilinear = loco::must_cast<const luci::CircleResizeBilinear *>(node);
+  s.args().append("align_corners", to_str(resize_bilinear->align_corners()));
+  s.args().append("half_pixel_centers", to_str(resize_bilinear->half_pixel_centers()));
+}
+
+std::vector<std::string>
+CircleResizeNearestNeighborSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size"};
+}
+
+void CircleResizeNearestNeighborSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                 locop::NodeSummary &s)
+{
+  auto resize_nn = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(node);
+  s.args().append("align_corners", to_str(resize_nn->align_corners()));
+}
+
+std::vector<std::string>
+CircleReverseSequenceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "seq_lengths"};
+}
+
+void CircleReverseSequenceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                           locop::NodeSummary &s)
+{
+  auto reverse_seq = loco::must_cast<const luci::CircleReverseSequence *>(node);
+  s.args().append("seq_axis", std::to_string(reverse_seq->seq_axis()));
+  s.args().append("batch_axis", std::to_string(reverse_seq->batch_axis()));
+}
+
+std::vector<std::string> CircleReverseV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"tensor", "axis"};
+}
+
+std::vector<std::string> CircleScatterNdSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "updates", "shape"};
+}
+
+std::vector<std::string> CircleSegmentSumSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "segment_ids"};
+}
+
+std::vector<std::string> CircleSelectSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition", "t", "e"};
+}
+
+std::vector<std::string> CircleSelectV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition", "t", "e"};
+}
+
+void CircleShapeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto shape = loco::must_cast<const luci::CircleShape *>(node);
+  s.args().append("out_type", to_str(shape->out_type()));
+}
+
+std::vector<std::string> CircleSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "begin", "size"};
+}
+
+std::vector<std::string> CircleSoftmaxSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"logits"};
+}
+
+void CircleSoftmaxSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                   locop::NodeSummary &s)
+{
+  auto softmax = loco::must_cast<const luci::CircleSoftmax *>(node);
+  s.args().append("beta", to_str(softmax->beta()));
+}
+
+std::vector<std::string>
+CircleSpaceToBatchNDSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "block_shape", "paddings"};
+}
+
+void CircleSpaceToDepthSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto space_to_depth = loco::must_cast<const luci::CircleSpaceToDepth *>(node);
+  s.args().append("block_size", to_str(space_to_depth->block_size()));
+}
+
+std::vector<std::string>
+CircleSparseToDenseSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"indices", "output_shape", "values", "default_value"};
+}
+
+void CircleSparseToDenseSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto sparse_to_dense = loco::must_cast<const luci::CircleSparseToDense *>(node);
+  s.args().append("validate_indices", to_str(sparse_to_dense->validate_indices()));
+}
+
+std::vector<std::string> CircleSplitSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"split_dim", "input"};
+}
+
+void CircleSplitSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto split = loco::must_cast<const luci::CircleSplit *>(node);
+  s.args().append("num_split", std::to_string(split->num_split()));
+}
+
+std::vector<std::string> CircleSplitVSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "size_splits", "split_dim"};
+}
+
+void CircleSplitVSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto split_v = loco::must_cast<const luci::CircleSplitV *>(node);
+  s.args().append("num_split", std::to_string(split_v->num_split()));
+}
+
+void CircleSqueezeSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                   locop::NodeSummary &s)
+{
+  auto squeeze = loco::must_cast<const luci::CircleSqueeze *>(node);
+
+  std::string squeeze_dims = "(";
+  for (size_t i = 0; i < squeeze->squeeze_dims().size(); ++i)
+  {
+    if (i != 0)
+      squeeze_dims += ", ";
+    squeeze_dims += std::to_string(squeeze->squeeze_dims().at(i));
+  }
+  squeeze_dims += ")";
+
+  s.args().append("squeeze_dims", squeeze_dims);
+}
+
+std::vector<std::string> CircleStridedSliceSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "begin", "end", "strides"};
+}
+
+void CircleStridedSliceSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                        locop::NodeSummary &s)
+{
+  auto strided_slice = loco::must_cast<const luci::CircleStridedSlice *>(node);
+  s.args().append("begin_mask", std::to_string(strided_slice->begin_mask()));
+  s.args().append("end_mask", std::to_string(strided_slice->end_mask()));
+  s.args().append("ellipsis_mask", std::to_string(strided_slice->ellipsis_mask()));
+  s.args().append("new_axis_mask", std::to_string(strided_slice->new_axis_mask()));
+  s.args().append("shrink_axis_mask", std::to_string(strided_slice->shrink_axis_mask()));
+}
+
+bool CircleSVDFSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+  if (svdf->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string> CircleSVDFSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "weight_feature", "weight_time", "bias", "State"};
+}
+
+void CircleSVDFSummaryBuilder::build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+{
+  auto svdf = loco::must_cast<const luci::CircleSVDF *>(node);
+  s.args().append("rank", to_str(svdf->svdf_rank()));
+  s.args().append("asymmetric_quantize_inputs", to_str(svdf->asymmetric_quantize_inputs()));
+  s.args().append("fused_activation_function", to_str(svdf->fusedActivationFunction()));
+}
+
+std::vector<std::string> CircleTileSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "multiples"};
+}
+
+std::vector<std::string> CircleTopKV2SummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input", "k"};
+}
+
+std::vector<std::string> CircleTransposeSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"a", "perm"};
+}
+
+bool CircleTransposeConvSummaryBuilder::validate(const luci::CircleNode *node)
+{
+  auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+  if (transpose_conv->padding() == luci::Padding::UNDEFINED)
+    return false;
+
+  return true;
+}
+
+std::vector<std::string>
+CircleTransposeConvSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"inputSizes", "filter", "outBackProp", "bias"};
+}
+
+void CircleTransposeConvSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                         locop::NodeSummary &s)
+{
+  auto transpose_conv = loco::must_cast<const luci::CircleTransposeConv *>(node);
+  s.args().append("stride(h,w)", to_str(transpose_conv->stride()));
+  s.args().append("padding", to_str(transpose_conv->padding()));
+}
+
+std::vector<std::string>
+CircleUnidirectionalSequenceLSTMSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"input",
+          "input_to_input_weights",
+          "input_to_forget_weights",
+          "input_to_cell_weights",
+          "input_to_output_weights",
+          "recurrent_to_input_weights",
+          "recurrent_to_forget_weights",
+          "recurrent_to_cell_weights",
+          "recurrent_to_output_weights",
+          "cell_to_input_weights",
+          "cell_to_forget_weights",
+          "cell_to_output_weights",
+          "input_gate_bias",
+          "forget_gate_bias",
+          "cell_gate_bias",
+          "output_gate_bias",
+          "projection_weights",
+          "projection_bias",
+          "activation_state",
+          "cell_state",
+          "input_layer_norm_coefficients",
+          "forget_layer_norm_coefficients",
+          "cell_layer_norm_coefficients",
+          "output_layer_norm_coefficients"};
+}
+
+void CircleUnidirectionalSequenceLSTMSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                                      locop::NodeSummary &s)
+{
+  auto lstm = loco::must_cast<const luci::CircleUnidirectionalSequenceLSTM *>(node);
+  s.args().append("cell_clip", to_str(lstm->cell_clip()));
+  s.args().append("proj_clip", to_str(lstm->proj_clip()));
+  s.args().append("time_major", to_str(lstm->time_major()));
+  s.args().append("asymmetric_quantize_inputs", to_str(lstm->asymmetric_quantize_inputs()));
+}
+
+void CircleUniqueSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto unique = loco::must_cast<const luci::CircleUnique *>(node);
+  s.args().append("idx_out_type", to_str(unique->idx_out_type()));
+}
+
+std::vector<std::string> CircleUnpackSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"value"};
+}
+
+void CircleUnpackSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                  locop::NodeSummary &s)
+{
+  auto unpack = loco::must_cast<const luci::CircleUnpack *>(node);
+  s.args().append("num", std::to_string(unpack->num()));
+  s.args().append("axis", std::to_string(unpack->axis()));
+}
+std::vector<std::string> CircleWhereSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"condition"};
+}
+
+std::vector<std::string> CircleWhileSummaryBuilder::get_input_names(const luci::CircleNode *node)
+{
+  auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+  auto input_names = std::vector<std::string>();
+  for (uint32_t i = 0; i < circle_while->input_count(); ++i)
+    input_names.push_back("input");
+
+  return input_names;
+}
+
+void CircleWhileSummaryBuilder::build_attributes(const luci::CircleNode *node,
+                                                 locop::NodeSummary &s)
+{
+  auto circle_while = loco::must_cast<const luci::CircleWhile *>(node);
+
+  if (circle_while->cond_graph() != nullptr)
+    s.args().append("then_graph", circle_while->cond_graph()->name());
+  else
+    s.args().append("then_branch", std::to_string(circle_while->cond_branch()));
+
+  if (circle_while->body_graph() != nullptr)
+    s.args().append("else_graph", circle_while->body_graph()->name());
+  else
+    s.args().append("else_branch", std::to_string(circle_while->body_branch()));
+}
+
+std::vector<std::string> CircleOutputSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"from"};
+}
+
+std::vector<std::string> CircleTopKV2OutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"topkv2"};
+}
+
+std::vector<std::string> CircleUniqueOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"unique"};
+}
+
+std::vector<std::string> CircleUnpackOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"unpack"};
+}
+
+std::vector<std::string> CircleWhileOutSummaryBuilder::get_input_names(const luci::CircleNode *)
+{
+  return {"while"};
+}
+
+} // namespace luci
diff --git a/compiler/luci/logex/src/CircleNodeSummaryBuilders.h b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h

new file mode 100644 (file)

index 0000000..6cd24b7
--- /dev/null
+++ b/compiler/luci/logex/src/CircleNodeSummaryBuilders.h
@@ -0,0 +1,821 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+#define __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
+
+#include "CircleNodeSummaryBuilder.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleNodeWithXSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithINPUTSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithXYSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNodeWithFEATURESSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+template <class REDUCER_NODE>
+class CircleNodeWithReducerSummaryBuilder : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *)
+  {
+    return {"input", "reduction_indices"};
+  }
+
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s)
+  {
+    auto mean = loco::must_cast<const REDUCER_NODE *>(node);
+    s.args().append("keep_dims", mean->keep_dims() ? "true" : "false");
+  }
+};
+
+} // namespace luci
+
+namespace luci
+{
+
+class CircleAbsSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleAddSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAddNSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+};
+
+class CircleArgMaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleArgMinSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleAveragePool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchMatMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBatchToSpaceNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleBCQFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBCQGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleBidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCastSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCeilSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleConcatenationSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleConstSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  void update_status(locop::NodeSummary &s);
+};
+
+class CircleConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleCosSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleCustomSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDepthToSpaceSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDepthwiseConv2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleDequantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleEluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleExpSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleExpandDimsSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFakeQuantSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleFillSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleFloorSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleFloorDivSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFloorModSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleFullyConnectedSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleGatherNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleGreaterSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleGreaterEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleIfSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleInstanceNormSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2NormalizeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleL2Pool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLeakyReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLessSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLessEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLocalResponseNormalizationSummaryBuilder final
+  : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleLogSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalAndSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogicalNotSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogicalOrSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleLogisticSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleLogSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMatrixSetDiagSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleMaximumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMaxPool2DSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMeanSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleMean>
+{
+};
+
+class CircleMinimumSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleMirrorPadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleMulSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleNegSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNonMaxSuppressionV5SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleNotEqualSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleOneHotSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CirclePadSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePadV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CirclePowSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CirclePReluSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleQuantizeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleRangeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRankSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleReduceAnySummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceAny>
+{
+};
+
+class CircleReduceMaxSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMax>
+{
+};
+
+class CircleReduceMinSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceMin>
+{
+};
+
+class CircleReduceProdSummaryBuilder final
+  : public CircleNodeWithReducerSummaryBuilder<luci::CircleReduceProd>
+{
+};
+
+class CircleReluSummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleRelu6SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReluN1To1SummaryBuilder final : public CircleNodeWithFEATURESSummaryBuilder
+{
+};
+
+class CircleReshapeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void update_status(locop::NodeSummary &s);
+};
+
+class CircleResizeBilinearSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleResizeNearestNeighborSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseSequenceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleReverseV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleRoundSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleRsqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleScatterNdSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSegmentSumSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSelectV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleShapeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSinSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSoftmaxSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSpaceToBatchNDSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleSpaceToDepthSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSparseToDenseSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSplitVSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSqrtSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquareSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleSquaredDifferenceSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSqueezeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleStridedSliceSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleSubSummaryBuilder final : public CircleNodeWithXYSummaryBuilder
+{
+};
+
+class CircleSumSummaryBuilder final : public CircleNodeWithReducerSummaryBuilder<luci::CircleSum>
+{
+};
+
+class CircleSVDFSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleTanhSummaryBuilder final : public CircleNodeWithXSummaryBuilder
+{
+};
+
+class CircleTileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTopKV2SummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleTransposeConvSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  bool validate(const luci::CircleNode *node);
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnidirectionalSequenceLSTMSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUniqueSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+private:
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleUnpackSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleWhereSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleWhileSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *node);
+  void build_attributes(const luci::CircleNode *node, locop::NodeSummary &s);
+};
+
+class CircleZerosLikeSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleBidirectionalSequenceLSTMOutSummaryBuilder final
+  : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleCustomOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleIfOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleInputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV4OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleNonMaxSuppressionV5OutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleOutputSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleOutputDummySummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleOutputExcludeSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleSplitOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleSplitVOutSummaryBuilder final : public CircleNodeWithINPUTSummaryBuilder
+{
+};
+
+class CircleTopKV2OutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUniqueOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleUnpackOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+class CircleVariableSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+};
+
+class CircleWhileOutSummaryBuilder final : public CircleNodeSummaryBuilder
+{
+private:
+  std::vector<std::string> get_input_names(const luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_LOGEX_CIRCLE_NODE_SUMMARY_BUILDERS__
diff --git a/compiler/luci/logex/src/FormattedGraph.cpp b/compiler/luci/logex/src/FormattedGraph.cpp

index 0588ed79e7c61cc9f1bb1c9140df71f40078de7c..d3b2170b0ae405186a590402fb2b9008e5257ee8 100644 (file)
--- a/compiler/luci/logex/src/FormattedGraph.cpp
+++ b/compiler/luci/logex/src/FormattedGraph.cpp
@@ -14,6 +14,7 @@
   * limitations under the License.
   */
  
+#include "CircleNodeSummaryBuilder.h"
  #include "luci/FormattedGraph.h"
  
  #include <luci/IR/CircleDialect.h>
@@ -25,2179 +26,6 @@
  #include <sstream>
  #include <vector>
  
-using namespace luci;
-/**
- * @brief dump std::vector<int64_t> values to stream
- */
-std::ostream &operator<<(std::ostream &os, const std::vector<int64_t> &vi64)
-{
-  for (auto vi : vi64)
-  {
-    os << vi << " ";
-  }
-  return os;
-}
-
-// For TF lite
-namespace
-{
-
-const char *to_str(loco::DataType type)
-{
-  switch (type)
-  {
-    case loco::DataType::U8:
-      return "UINT8";
-    case loco::DataType::U16:
-      return "UINT16";
-    case loco::DataType::U32:
-      return "UINT32";
-    case loco::DataType::U64:
-      return "UINT64";
-
-    case loco::DataType::S8:
-      return "INT8";
-    case loco::DataType::S16:
-      return "INT16";
-    case loco::DataType::S32:
-      return "INT32";
-    case loco::DataType::S64:
-      return "INT64";
-
-    case loco::DataType::FLOAT16:
-      return "FLOAT16";
-    case loco::DataType::FLOAT32:
-      return "FLOAT32";
-    case loco::DataType::FLOAT64:
-      return "FLOAT64";
-
-    case loco::DataType::BOOL:
-      return "BOOL";
-
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(bool value) { return value ? "true" : "false"; }
-
-const char *to_str(luci::FusedActFunc fused)
-{
-  switch (fused)
-  {
-    case luci::FusedActFunc::NONE:
-      return "NONE";
-    case luci::FusedActFunc::RELU:
-      return "RELU";
-    case luci::FusedActFunc::RELU_N1_TO_1:
-      return "RELU_N1_TO_1";
-    case luci::FusedActFunc::RELU6:
-      return "RELU6";
-    case luci::FusedActFunc::TANH:
-      return "TANH";
-    case luci::FusedActFunc::SIGN_BIT:
-      return "SIGN_BIT";
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(luci::Padding padding)
-{
-  switch (padding)
-  {
-    case luci::Padding::SAME:
-      return "SAME";
-    case luci::Padding::VALID:
-      return "VALID";
-    default:
-      return "Error";
-  }
-}
-
-const char *to_str(luci::MirrorPadMode mode)
-{
-  switch (mode)
-  {
-    case luci::MirrorPadMode::REFLECT:
-      return "REFLECT";
-    case luci::MirrorPadMode::SYMMETRIC:
-      return "SYMMETRIC";
-    default:
-      return "Error";
-  }
-}
-
-std::string to_str(const luci::Stride *stride)
-{
-  return pepper::str(stride->h(), ",", stride->w());
-}
-
-std::string to_str(const luci::Filter *filter)
-{
-  return pepper::str(filter->h(), ",", filter->w());
-}
-
-std::string circle_opname(uint32_t opnum)
-{
-  static const std::string prefix{"circle."};
-
-  switch (static_cast<luci::CircleOpcode>(opnum))
-  {
-#define CIRCLE_NODE(OPCODE, CLASS) \
-  case luci::CircleOpcode::OPCODE: \
-    return prefix + #OPCODE;
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-    default:
-      break;
-  };
-
-  return prefix + "Invalid";
-}
-
-// CircleNodeSummaryBuilder with default implementation
-class CircleNodeSummaryBuilderBase : public locop::NodeSummaryBuilder
-{
-public:
-  CircleNodeSummaryBuilderBase(const locop::SymbolTable *tbl) : _tbl{tbl}
-  {
-    // DO NOTHING
-  }
-
-public:
-  bool build(const loco::Node *, locop::NodeSummary &s) const final;
-
-protected:
-#define CIRCLE_NODE(OPCODE, CLASS) \
-  virtual bool summary(const CLASS *, locop::NodeSummary &) const { return false; }
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
-protected:
-  const locop::SymbolTable *tbl(void) const { return _tbl; }
-
-private:
-  const locop::SymbolTable *_tbl;
-};
-
-template <class CIRCLENODE>
-bool use_x(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_input(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_features(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("features", tbl->lookup(node->features()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_xy_act(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_reducer(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("reduction_indices", tbl->lookup(node->reduction_indices()));
-  s.args().append("keep_dims", node->keep_dims() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-template <class CIRCLENODE>
-bool use_ido(const locop::SymbolTable *tbl, const CIRCLENODE *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("dimension", tbl->lookup(node->dimension()));
-  s.args().append("output_type", to_str(node->output_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAddN *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->arity(); ++i)
-    s.args().append("inputs", tbl->lookup(node->inputs(i)));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleAveragePool2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchMatMul *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("y", tbl->lookup(node->y()));
-  s.args().append("adj_x", to_str(node->adj_x()));
-  s.args().append("adj_y", to_str(node->adj_y()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBatchToSpaceND *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_shape", tbl->lookup(node->block_shape()));
-  s.args().append("crops", tbl->lookup(node->crops()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBidirectionalSequenceLSTM *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-
-  s.args().append("fw_input_to_input_weights", tbl->lookup(node->fw_input_to_input_weights()));
-  s.args().append("fw_input_to_forget_weights", tbl->lookup(node->fw_input_to_forget_weights()));
-  s.args().append("fw_input_to_cell_weights", tbl->lookup(node->fw_input_to_cell_weights()));
-  s.args().append("fw_input_to_output_weights", tbl->lookup(node->fw_input_to_output_weights()));
-
-  s.args().append("fw_recurrent_to_input_weights",
-                  tbl->lookup(node->fw_recurrent_to_input_weights()));
-  s.args().append("fw_recurrent_to_forget_weights",
-                  tbl->lookup(node->fw_recurrent_to_forget_weights()));
-  s.args().append("fw_recurrent_to_cell_weights",
-                  tbl->lookup(node->fw_recurrent_to_cell_weights()));
-  s.args().append("fw_recurrent_to_output_weights",
-                  tbl->lookup(node->fw_recurrent_to_output_weights()));
-
-  s.args().append("fw_cell_to_input_weights", tbl->lookup(node->fw_cell_to_input_weights()));
-  s.args().append("fw_cell_to_forget_weights", tbl->lookup(node->fw_cell_to_forget_weights()));
-  s.args().append("fw_cell_to_output_weights", tbl->lookup(node->fw_cell_to_output_weights()));
-
-  s.args().append("fw_input_gate_bias", tbl->lookup(node->fw_input_gate_bias()));
-  s.args().append("fw_forget_gate_bias", tbl->lookup(node->fw_forget_gate_bias()));
-  s.args().append("fw_cell_gate_bias", tbl->lookup(node->fw_cell_gate_bias()));
-  s.args().append("fw_output_gate_bias", tbl->lookup(node->fw_output_gate_bias()));
-
-  s.args().append("fw_projection_weights", tbl->lookup(node->fw_projection_weights()));
-  s.args().append("fw_projection_bias", tbl->lookup(node->fw_projection_bias()));
-
-  s.args().append("bw_input_to_input_weights", tbl->lookup(node->bw_input_to_input_weights()));
-  s.args().append("bw_input_to_forget_weights", tbl->lookup(node->bw_input_to_forget_weights()));
-  s.args().append("bw_input_to_cell_weights", tbl->lookup(node->bw_input_to_cell_weights()));
-  s.args().append("bw_input_to_output_weights", tbl->lookup(node->bw_input_to_output_weights()));
-
-  s.args().append("bw_recurrent_to_input_weights",
-                  tbl->lookup(node->bw_recurrent_to_input_weights()));
-  s.args().append("bw_recurrent_to_forget_weights",
-                  tbl->lookup(node->bw_recurrent_to_forget_weights()));
-  s.args().append("bw_recurrent_to_cell_weights",
-                  tbl->lookup(node->bw_recurrent_to_cell_weights()));
-  s.args().append("bw_recurrent_to_output_weights",
-                  tbl->lookup(node->bw_recurrent_to_output_weights()));
-
-  s.args().append("bw_cell_to_input_weights", tbl->lookup(node->bw_cell_to_input_weights()));
-  s.args().append("bw_cell_to_forget_weights", tbl->lookup(node->bw_cell_to_forget_weights()));
-  s.args().append("bw_cell_to_output_weights", tbl->lookup(node->bw_cell_to_output_weights()));
-
-  s.args().append("bw_input_gate_bias", tbl->lookup(node->bw_input_gate_bias()));
-  s.args().append("bw_forget_gate_bias", tbl->lookup(node->bw_forget_gate_bias()));
-  s.args().append("bw_cell_gate_bias", tbl->lookup(node->bw_cell_gate_bias()));
-  s.args().append("bw_output_gate_bias", tbl->lookup(node->bw_output_gate_bias()));
-
-  s.args().append("bw_projection_weights", tbl->lookup(node->bw_projection_weights()));
-  s.args().append("bw_projection_bias", tbl->lookup(node->bw_projection_bias()));
-
-  s.args().append("fw_activation_state", tbl->lookup(node->fw_activation_state()));
-  s.args().append("fw_cell_state", tbl->lookup(node->fw_cell_state()));
-  s.args().append("bw_activation_state", tbl->lookup(node->bw_activation_state()));
-  s.args().append("bw_cell_state", tbl->lookup(node->bw_cell_state()));
-
-  s.args().append("auxillary_input", tbl->lookup(node->auxillary_input()));
-  s.args().append("fw_auxillary_input_to_input_weights",
-                  tbl->lookup(node->fw_auxillary_input_to_input_weights()));
-  s.args().append("fw_auxillary_input_to_forget_weights",
-                  tbl->lookup(node->fw_auxillary_input_to_forget_weights()));
-  s.args().append("fw_auxillary_input_to_cell_weights",
-                  tbl->lookup(node->fw_auxillary_input_to_cell_weights()));
-  s.args().append("fw_auxillary_input_to_output_weights",
-                  tbl->lookup(node->fw_auxillary_input_to_output_weights()));
-  s.args().append("bw_auxillary_input_to_input_weights",
-                  tbl->lookup(node->bw_auxillary_input_to_input_weights()));
-  s.args().append("bw_auxillary_input_to_forget_weights",
-                  tbl->lookup(node->bw_auxillary_input_to_forget_weights()));
-  s.args().append("bw_auxillary_input_to_cell_weights",
-                  tbl->lookup(node->bw_auxillary_input_to_cell_weights()));
-  s.args().append("bw_auxillary_input_to_output_weights",
-                  tbl->lookup(node->bw_auxillary_input_to_output_weights()));
-
-  s.args().append("cell_clip", to_str(node->cell_clip()));
-  s.args().append("proj_clip", to_str(node->proj_clip()));
-  s.args().append("merge_outputs", to_str(node->merge_outputs()));
-  s.args().append("time_major", to_str(node->time_major()));
-  s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCast *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("in_data_type", to_str(node->in_data_type()));
-  s.args().append("out_data_type", to_str(node->out_data_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConcatenation *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-    s.args().append("values", tbl->lookup(node->values(i)));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleConv2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleCustom *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->numInputs(); i++)
-  {
-    s.args().append("input" + std::to_string(i), tbl->lookup(node->inputs(i)));
-  }
-  s.args().append("custom_code", node->custom_code());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthToSpace *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_size", std::to_string(node->block_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleDepthwiseConv2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("dilation(h,w)", to_str(node->dilation()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("depthMultiplier", std::to_string(node->depthMultiplier()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleExpandDims *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("axis", tbl->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFakeQuant *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("inputs", tbl->lookup(node->inputs()));
-  s.args().append("min", pepper::str(node->min()));
-  s.args().append("max", pepper::str(node->max()));
-  s.args().append("num_bits", pepper::str(node->num_bits()));
-  s.args().append("narrow_range", node->narrow_range() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFill *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("dims", tbl->lookup(node->dims()));
-  s.args().append("value", tbl->lookup(node->value()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleFullyConnected *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("weights", tbl->lookup(node->weights()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGather *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("params", tbl->lookup(node->params()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleGatherNd *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("params", tbl->lookup(node->params()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleIf *node, locop::NodeSummary &s)
-{
-  s.args().append("cond", tbl->lookup(node->cond()));
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl->lookup(node->input(i)));
-
-  if (node->then_graph() != nullptr)
-    s.args().append("then_graph", node->then_graph()->name());
-  else
-    s.args().append("then_branch", pepper::str(node->then_branch()));
-
-  if (node->else_graph() != nullptr)
-    s.args().append("else_graph", node->else_graph()->name());
-  else
-    s.args().append("else_branch", pepper::str(node->else_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Normalize *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("x", tbl->lookup(node->x()));
-  s.args().append("fused_activation_function", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleL2Pool2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLeakyRelu *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("features", tbl->lookup(node->features()));
-  s.args().append("alpha", std::to_string(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLocalResponseNormalization *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("radius", pepper::str(node->radius()));
-  s.args().append("bias", pepper::str(node->bias()));
-  s.args().append("alpha", pepper::str(node->alpha()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleLogSoftmax *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("logits", tbl->lookup(node->logits()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixDiag *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("diagonal", tbl->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMatrixSetDiag *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("diagonal", tbl->lookup(node->diagonal()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMaxPool2D *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("filter(h,w)", to_str(node->filter()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleMirrorPad *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.args().append("mode", to_str(node->mode()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV4 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("boxes", tbl->lookup(node->boxes()));
-  s.args().append("scores", tbl->lookup(node->scores()));
-  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
-  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
-  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleNonMaxSuppressionV5 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("boxes", tbl->lookup(node->boxes()));
-  s.args().append("scores", tbl->lookup(node->scores()));
-  s.args().append("max_output_size", tbl->lookup(node->max_output_size()));
-  s.args().append("iou_threshold", tbl->lookup(node->iou_threshold()));
-  s.args().append("score_threshold", tbl->lookup(node->score_threshold()));
-  s.args().append("soft_nms_sigma", tbl->lookup(node->soft_nms_sigma()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOneHot *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("depth", tbl->lookup(node->depth()));
-  s.args().append("on_value", tbl->lookup(node->on_value()));
-  s.args().append("off_value", tbl->lookup(node->off_value()));
-  s.args().append("axis", pepper::str(node->axis()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePack *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-    s.args().append("values", tbl->lookup(node->values(i)));
-  s.args().append("values_count", pepper::str(node->values_count()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePad *node, locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePadV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.args().append("constant_values", tbl->lookup(node->constant_values()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CirclePRelu *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("alpha", tbl->lookup(node->alpha()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleRange *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("start", tbl->lookup(node->start()));
-  s.args().append("limit", tbl->lookup(node->limit()));
-  s.args().append("delta", tbl->lookup(node->delta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReshape *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("tensor", tbl->lookup(node->tensor()));
-  s.args().append("shape", tbl->lookup(node->shape()));
-  // TODO Show newShape info
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeBilinear *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.args().append("half_pixel_centers", node->half_pixel_centers() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleResizeNearestNeighbor *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.args().append("align_corners", node->align_corners() ? "true" : "false");
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseSequence *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("seq_lengths", tbl->lookup(node->seq_lengths()));
-  s.args().append("seq_axis", std::to_string(node->seq_axis()));
-  s.args().append("batch_axis", std::to_string(node->batch_axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleReverseV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("tensor", tbl->lookup(node->tensor()));
-  s.args().append("axis", tbl->lookup(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleScatterNd *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("updates", tbl->lookup(node->updates()));
-  s.args().append("shape", tbl->lookup(node->shape()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSegmentSum *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("segment_ids", tbl->lookup(node->segment_ids()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelect *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.args().append("t", tbl->lookup(node->t()));
-  s.args().append("e", tbl->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSelectV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.args().append("t", tbl->lookup(node->t()));
-  s.args().append("e", tbl->lookup(node->e()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleShape *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("out_type", to_str(node->out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSlice *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("begin", tbl->lookup(node->begin()));
-  s.args().append("size", tbl->lookup(node->size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSoftmax *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("logits", tbl->lookup(node->logits()));
-  s.args().append("beta", pepper::str(node->beta()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToBatchND *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_shape", tbl->lookup(node->block_shape()));
-  s.args().append("paddings", tbl->lookup(node->paddings()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSpaceToDepth *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("block_size", pepper::str(node->block_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSparseToDense *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("output_shape", tbl->lookup(node->output_shape()));
-  s.args().append("values", tbl->lookup(node->values()));
-  s.args().append("default_value", tbl->lookup(node->default_value()));
-  s.args().append("Validate_indices", pepper::str(node->validate_indices()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplit *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("split_dim", tbl->lookup(node->split_dim()));
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("num_split", pepper::str(node->num_split()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSplitV *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("size_splits", tbl->lookup(node->size_splits()));
-  s.args().append("split_dim", tbl->lookup(node->split_dim()));
-  s.args().append("num_split", pepper::str(node->num_split()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleSqueeze *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-
-  std::stringstream ss{"("};
-  for (size_t i = 0; i < node->squeeze_dims().size(); ++i)
-  {
-    if (i != 0)
-      ss << ", ";
-    ss << node->squeeze_dims()[i];
-  }
-  ss << ")";
-  s.args().append("squeeze_dims", ss.str());
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleStridedSlice *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("begin", tbl->lookup(node->begin()));
-  s.args().append("end", tbl->lookup(node->end()));
-  s.args().append("strides", tbl->lookup(node->strides()));
-  s.args().append("begin_mask", pepper::str(node->begin_mask()));
-  s.args().append("end_mask", pepper::str(node->end_mask()));
-  s.args().append("ellipsis_mask", pepper::str(node->ellipsis_mask()));
-  s.args().append("new_axis_mask", pepper::str(node->new_axis_mask()));
-  s.args().append("shrink_axis_mask", pepper::str(node->shrink_axis_mask()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTile *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("multiples", tbl->lookup(node->multiples()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2 *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("k", tbl->lookup(node->k()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTranspose *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("a", tbl->lookup(node->a()));
-  s.args().append("perm", tbl->lookup(node->perm()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTransposeConv *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->padding() != luci::Padding::UNDEFINED);
-
-  s.args().append("inputSizes", tbl->lookup(node->inputSizes()));
-  s.args().append("filter", tbl->lookup(node->filter()));
-  s.args().append("outBackprop", tbl->lookup(node->outBackprop()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("stride(h,w)", to_str(node->stride()));
-  s.args().append("padding", to_str(node->padding()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnidirectionalSequenceLSTM *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-
-  s.args().append("input_to_input_weights", tbl->lookup(node->input_to_input_weights()));
-  s.args().append("input_to_forget_weights", tbl->lookup(node->input_to_forget_weights()));
-  s.args().append("input_to_cell_weights", tbl->lookup(node->input_to_cell_weights()));
-  s.args().append("input_to_output_weights", tbl->lookup(node->input_to_output_weights()));
-
-  s.args().append("recurrent_to_input_weights", tbl->lookup(node->recurrent_to_input_weights()));
-  s.args().append("recurrent_to_forget_weights", tbl->lookup(node->recurrent_to_forget_weights()));
-  s.args().append("recurrent_to_cell_weights", tbl->lookup(node->recurrent_to_cell_weights()));
-  s.args().append("recurrent_to_output_weights", tbl->lookup(node->recurrent_to_output_weights()));
-
-  s.args().append("cell_to_input_weights", tbl->lookup(node->cell_to_input_weights()));
-  s.args().append("cell_to_forget_weights", tbl->lookup(node->cell_to_forget_weights()));
-  s.args().append("cell_to_output_weights", tbl->lookup(node->cell_to_output_weights()));
-
-  s.args().append("input_gate_bias", tbl->lookup(node->input_gate_bias()));
-  s.args().append("forget_gate_bias", tbl->lookup(node->forget_gate_bias()));
-  s.args().append("cell_gate_bias", tbl->lookup(node->cell_gate_bias()));
-  s.args().append("output_gate_bias", tbl->lookup(node->output_gate_bias()));
-
-  s.args().append("projection_weights", tbl->lookup(node->projection_weights()));
-  s.args().append("projection_bias", tbl->lookup(node->projection_bias()));
-
-  s.args().append("activation_state", tbl->lookup(node->activation_state()));
-  s.args().append("cell_state", tbl->lookup(node->cell_state()));
-
-  s.args().append("input_layer_norm_coefficients",
-                  tbl->lookup(node->input_layer_norm_coefficients()));
-  s.args().append("forget_layer_norm_coefficients",
-                  tbl->lookup(node->forget_layer_norm_coefficients()));
-  s.args().append("cell_layer_norm_coefficients",
-                  tbl->lookup(node->cell_layer_norm_coefficients()));
-  s.args().append("output_layer_norm_coefficients",
-                  tbl->lookup(node->output_layer_norm_coefficients()));
-
-  s.args().append("cell_clip", to_str(node->cell_clip()));
-  s.args().append("proj_clip", to_str(node->proj_clip()));
-  s.args().append("time_major", to_str(node->time_major()));
-  s.args().append("asymmetric_quantize_inputs", to_str(node->asymmetric_quantize_inputs()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnique *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("idx_out_type", to_str(node->idx_out_type()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpack *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("value", tbl->lookup(node->value()));
-  s.args().append("num", pepper::str(node->num()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhere *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("condition", tbl->lookup(node->condition()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhile *node,
-                  locop::NodeSummary &s)
-{
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-    s.args().append("input", tbl->lookup(node->input(i)));
-
-  if (node->cond_graph() != nullptr)
-    s.args().append("cond_graph", node->cond_graph()->name());
-  else
-    s.args().append("cond_branch", pepper::str(node->cond_branch()));
-
-  if (node->body_graph() != nullptr)
-    s.args().append("body_graph", node->body_graph()->name());
-  else
-    s.args().append("body_branch", pepper::str(node->body_branch()));
-
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleTopKV2Out *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("topkv2", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUniqueOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("unique", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleUnpackOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("unpack", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleWhileOut *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("while", tbl->lookup(node->input()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleOutput *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("from", tbl->lookup(node->from()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *, const luci::CircleOutputDummy *,
-                  locop::NodeSummary &s)
-{
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *, const luci::CircleOutputExclude *,
-                  locop::NodeSummary &s)
-{
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQFullyConnected *node,
-                  locop::NodeSummary &s)
-{
-  assert(node->fusedActivationFunction() != luci::FusedActFunc::UNDEFINED);
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("weights_scales", tbl->lookup(node->weights_scales()));
-  s.args().append("weights_binary", tbl->lookup(node->weights_binary()));
-  s.args().append("bias", tbl->lookup(node->bias()));
-  s.args().append("weights_clusters", tbl->lookup(node->weights_clusters()));
-  s.args().append("fused", to_str(node->fusedActivationFunction()));
-  s.args().append("weights_hidden_size", pepper::str(node->weights_hidden_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleBCQGather *node,
-                  locop::NodeSummary &s)
-{
-  s.args().append("input_scales", tbl->lookup(node->input_scales()));
-  s.args().append("input_binary", tbl->lookup(node->input_binary()));
-  s.args().append("indices", tbl->lookup(node->indices()));
-  s.args().append("input_clusters", tbl->lookup(node->input_clusters()));
-  s.args().append("axis", pepper::str(node->axis()));
-  s.args().append("input_hidden_size", pepper::str(node->input_hidden_size()));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool summary_node(const locop::SymbolTable *tbl, const luci::CircleInstanceNorm *node,
-                  locop::NodeSummary &s)
-{
-  auto fused = node->fusedActivationFunction();
-  assert(fused != luci::FusedActFunc::UNDEFINED);
-
-  s.args().append("input", tbl->lookup(node->input()));
-  s.args().append("gamma", tbl->lookup(node->gamma()));
-  s.args().append("beta", tbl->lookup(node->beta()));
-  s.args().append("epsilon", pepper::str(node->epsilon()));
-  s.args().append("fused_activation_function", to_str(fused));
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-// SummaryBuilderLet type
-enum class SB
-{
-  ABC,
-  DEF,
-  GHIJ,
-  KLMN,
-  OPQR,
-  STUV,
-  WXYZ,
-  CIRC, // circle only
-  VIRT, // virtual
-};
-
-template <SB sb> class SummaryBuilderLet;
-
-#define IMPLEMENT(CLASS) bool summary(const CLASS *, locop::NodeSummary &) const final;
-
-template <> class SummaryBuilderLet<SB::ABC> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleAbs)
-  IMPLEMENT(luci::CircleAdd)
-  IMPLEMENT(luci::CircleAddN)
-  IMPLEMENT(luci::CircleArgMax)
-  IMPLEMENT(luci::CircleArgMin)
-  IMPLEMENT(luci::CircleAveragePool2D)
-  IMPLEMENT(luci::CircleBatchMatMul)
-  IMPLEMENT(luci::CircleBatchToSpaceND)
-  IMPLEMENT(luci::CircleBidirectionalSequenceLSTM)
-  IMPLEMENT(luci::CircleCast)
-  IMPLEMENT(luci::CircleCeil)
-  IMPLEMENT(luci::CircleConcatenation)
-  IMPLEMENT(luci::CircleConst)
-  IMPLEMENT(luci::CircleConv2D)
-  IMPLEMENT(luci::CircleCos)
-  IMPLEMENT(luci::CircleCustom)
-};
-
-template <> class SummaryBuilderLet<SB::DEF> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleDepthToSpace)
-  IMPLEMENT(luci::CircleDepthwiseConv2D)
-  IMPLEMENT(luci::CircleDequantize)
-  IMPLEMENT(luci::CircleDiv)
-  IMPLEMENT(luci::CircleElu)
-  IMPLEMENT(luci::CircleEqual)
-  IMPLEMENT(luci::CircleExp)
-  IMPLEMENT(luci::CircleExpandDims)
-  IMPLEMENT(luci::CircleFakeQuant)
-  IMPLEMENT(luci::CircleFill)
-  IMPLEMENT(luci::CircleFloor)
-  IMPLEMENT(luci::CircleFloorDiv)
-  IMPLEMENT(luci::CircleFloorMod)
-  IMPLEMENT(luci::CircleFullyConnected)
-};
-
-template <> class SummaryBuilderLet<SB::GHIJ> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleGather)
-  IMPLEMENT(luci::CircleGatherNd)
-  IMPLEMENT(luci::CircleGreater)
-  IMPLEMENT(luci::CircleGreaterEqual)
-  IMPLEMENT(luci::CircleIf)
-};
-
-template <> class SummaryBuilderLet<SB::KLMN> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleL2Normalize)
-  IMPLEMENT(luci::CircleL2Pool2D)
-  IMPLEMENT(luci::CircleLeakyRelu)
-  IMPLEMENT(luci::CircleLess)
-  IMPLEMENT(luci::CircleLessEqual)
-  IMPLEMENT(luci::CircleLocalResponseNormalization)
-  IMPLEMENT(luci::CircleLog)
-  IMPLEMENT(luci::CircleLogicalAnd)
-  IMPLEMENT(luci::CircleLogicalNot)
-  IMPLEMENT(luci::CircleLogicalOr)
-  IMPLEMENT(luci::CircleLogistic)
-  IMPLEMENT(luci::CircleLogSoftmax)
-  IMPLEMENT(luci::CircleMatrixDiag)
-  IMPLEMENT(luci::CircleMatrixSetDiag)
-  IMPLEMENT(luci::CircleMaximum)
-  IMPLEMENT(luci::CircleMaxPool2D)
-  IMPLEMENT(luci::CircleMean)
-  IMPLEMENT(luci::CircleMinimum)
-  IMPLEMENT(luci::CircleMirrorPad)
-  IMPLEMENT(luci::CircleMul)
-  IMPLEMENT(luci::CircleNeg)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV4)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV5)
-  IMPLEMENT(luci::CircleNotEqual)
-};
-
-template <> class SummaryBuilderLet<SB::OPQR> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleOneHot)
-  IMPLEMENT(luci::CirclePack)
-  IMPLEMENT(luci::CirclePad)
-  IMPLEMENT(luci::CirclePadV2)
-  IMPLEMENT(luci::CirclePow)
-  IMPLEMENT(luci::CirclePRelu)
-  IMPLEMENT(luci::CircleQuantize)
-  IMPLEMENT(luci::CircleRange)
-  IMPLEMENT(luci::CircleRank)
-  IMPLEMENT(luci::CircleReduceAny)
-  IMPLEMENT(luci::CircleReduceMax)
-  IMPLEMENT(luci::CircleReduceMin)
-  IMPLEMENT(luci::CircleReduceProd)
-  IMPLEMENT(luci::CircleRelu)
-  IMPLEMENT(luci::CircleRelu6)
-  IMPLEMENT(luci::CircleReluN1To1)
-  IMPLEMENT(luci::CircleReshape)
-  IMPLEMENT(luci::CircleResizeBilinear)
-  IMPLEMENT(luci::CircleResizeNearestNeighbor)
-  IMPLEMENT(luci::CircleReverseSequence)
-  IMPLEMENT(luci::CircleReverseV2)
-  IMPLEMENT(luci::CircleRound)
-  IMPLEMENT(luci::CircleRsqrt)
-};
-
-template <> class SummaryBuilderLet<SB::STUV> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleScatterNd)
-  IMPLEMENT(luci::CircleSegmentSum)
-  IMPLEMENT(luci::CircleSelect)
-  IMPLEMENT(luci::CircleSelectV2)
-  IMPLEMENT(luci::CircleShape)
-  IMPLEMENT(luci::CircleSin)
-  IMPLEMENT(luci::CircleSlice)
-  IMPLEMENT(luci::CircleSoftmax)
-  IMPLEMENT(luci::CircleSpaceToBatchND)
-  IMPLEMENT(luci::CircleSpaceToDepth)
-  IMPLEMENT(luci::CircleSparseToDense)
-  IMPLEMENT(luci::CircleSplit)
-  IMPLEMENT(luci::CircleSplitV)
-  IMPLEMENT(luci::CircleSqrt)
-  IMPLEMENT(luci::CircleSquare)
-  IMPLEMENT(luci::CircleSquaredDifference)
-  IMPLEMENT(luci::CircleSqueeze)
-  IMPLEMENT(luci::CircleStridedSlice)
-  IMPLEMENT(luci::CircleSub)
-  IMPLEMENT(luci::CircleSum)
-  IMPLEMENT(luci::CircleTanh)
-  IMPLEMENT(luci::CircleTile)
-  IMPLEMENT(luci::CircleTopKV2)
-  IMPLEMENT(luci::CircleTranspose)
-  IMPLEMENT(luci::CircleTransposeConv)
-  IMPLEMENT(luci::CircleUnidirectionalSequenceLSTM)
-  IMPLEMENT(luci::CircleUnique)
-  IMPLEMENT(luci::CircleUnpack)
-};
-
-template <> class SummaryBuilderLet<SB::WXYZ> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleWhere)
-  IMPLEMENT(luci::CircleWhile)
-  IMPLEMENT(luci::CircleZerosLike)
-};
-
-template <> class SummaryBuilderLet<SB::CIRC> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleBCQFullyConnected)
-  IMPLEMENT(luci::CircleBCQGather)
-  IMPLEMENT(luci::CircleInstanceNorm)
-};
-
-template <> class SummaryBuilderLet<SB::VIRT> final : public CircleNodeSummaryBuilderBase
-{
-public:
-  SummaryBuilderLet(const locop::SymbolTable *tbl) : CircleNodeSummaryBuilderBase(tbl)
-  {
-    // DO NOTHING
-  }
-
-private:
-  IMPLEMENT(luci::CircleInput)
-  IMPLEMENT(luci::CircleOutput)
-  IMPLEMENT(luci::CircleCustomOut)
-  IMPLEMENT(luci::CircleIfOut)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV4Out)
-  IMPLEMENT(luci::CircleNonMaxSuppressionV5Out)
-  IMPLEMENT(luci::CircleOutputDummy)
-  IMPLEMENT(luci::CircleOutputExclude)
-  IMPLEMENT(luci::CircleSplitOut)
-  IMPLEMENT(luci::CircleSplitVOut)
-  IMPLEMENT(luci::CircleTopKV2Out)
-  IMPLEMENT(luci::CircleUniqueOut)
-  IMPLEMENT(luci::CircleUnpackOut)
-  IMPLEMENT(luci::CircleWhileOut)
-};
-
-#undef IMPLEMENT
-
-bool CircleNodeSummaryBuilderBase::build(const loco::Node *node, locop::NodeSummary &s) const
-{
-  if (node->dialect() != luci::CircleDialect::get())
-    return false;
-
-  auto ptr_to_str = [](const void *ptr) {
-    std::stringstream ss;
-    ss << ptr;
-    return ss.str();
-  };
-
-  auto add_comment = [&]() {
-    auto cnode = loco::must_cast<const luci::CircleNode *>(node);
-    s.opname(circle_opname(node->opnum()));
-    s.comments().append("[" + cnode->name() + "] = " + ptr_to_str(node));
-  };
-
-#define CIRCLE_NODE(OPCODE, CLASS)                     \
-  if (dynamic_cast<const CLASS *>(node))               \
-  {                                                    \
-    if (summary(dynamic_cast<const CLASS *>(node), s)) \
-    {                                                  \
-      add_comment();                                   \
-      return true;                                     \
-    }                                                  \
-  }
-#define CIRCLE_VNODE CIRCLE_NODE
-#include <luci/IR/CircleNodes.lst>
-#undef CIRCLE_VNODE
-#undef CIRCLE_NODE
-
-  return false;
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAbs *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAdd *node, locop::NodeSummary &s) const
-{
-  return use_xy_act(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAddN *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMax *node,
-                                         locop::NodeSummary &s) const
-{
-  return use_ido(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleArgMin *node,
-                                         locop::NodeSummary &s) const
-{
-  return use_ido(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleAveragePool2D *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchMatMul *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBatchToSpaceND *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleBidirectionalSequenceLSTM *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCast *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCeil *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConcatenation *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConst *, locop::NodeSummary &s) const
-{
-  s.state(locop::NodeSummary::State::PartiallyKnown);
-  return true;
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleConv2D *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCos *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::ABC>::summary(const luci::CircleCustom *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthToSpace *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDepthwiseConv2D *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDequantize *node,
-                                         locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleDiv *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleElu *node, locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleEqual *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExp *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleExpandDims *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFakeQuant *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFill *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloor *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorDiv *node,
-                                         locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFloorMod *node,
-                                         locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::DEF>::summary(const luci::CircleFullyConnected *node,
-                                         locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGather *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGatherNd *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreater *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleGreaterEqual *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::GHIJ>::summary(const luci::CircleIf *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Normalize *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleL2Pool2D *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLess *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLessEqual *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLeakyRelu *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLocalResponseNormalization *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLog *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalAnd *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalNot *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogicalOr *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogistic *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleLogSoftmax *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixDiag *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMatrixSetDiag *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaximum *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMaxPool2D *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMean *node, locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMinimum *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMirrorPad *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleMul *node, locop::NodeSummary &s) const
-{
-  return use_xy_act(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNeg *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV4 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNonMaxSuppressionV5 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::KLMN>::summary(const luci::CircleNotEqual *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleOneHot *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePack *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePad *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePadV2 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePow *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CirclePRelu *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleQuantize *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRange *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRank *node, locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceAny *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMax *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceMin *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReduceProd *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu *node, locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRelu6 *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReluN1To1 *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_features(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReshape *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeBilinear *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleResizeNearestNeighbor *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseSequence *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleReverseV2 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRound *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::OPQR>::summary(const luci::CircleRsqrt *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleScatterNd *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSegmentSum *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelect *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSelectV2 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleShape *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSin *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSlice *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSoftmax *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToBatchND *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSpaceToDepth *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSparseToDense *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplit *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSplitV *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqrt *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquare *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSquaredDifference *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSqueeze *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleStridedSlice *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSub *node, locop::NodeSummary &s) const
-{
-  return use_xy(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleSum *node, locop::NodeSummary &s) const
-{
-  return use_reducer(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTanh *node, locop::NodeSummary &s) const
-{
-  return use_x(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTile *node, locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTopKV2 *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTranspose *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleTransposeConv *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnidirectionalSequenceLSTM *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnique *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::STUV>::summary(const luci::CircleUnpack *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhere *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleWhile *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::WXYZ>::summary(const luci::CircleZerosLike *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQFullyConnected *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleBCQGather *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::CIRC>::summary(const luci::CircleInstanceNorm *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleInput *, locop::NodeSummary &s) const
-{
-  s.state(locop::NodeSummary::State::Complete);
-  return true;
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutput *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleCustomOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleIfOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV4Out *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleNonMaxSuppressionV5Out *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputDummy *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleOutputExclude *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleSplitVOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return use_input(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleTopKV2Out *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUniqueOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleUnpackOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-bool SummaryBuilderLet<SB::VIRT>::summary(const luci::CircleWhileOut *node,
-                                          locop::NodeSummary &s) const
-{
-  return summary_node(tbl(), node, s);
-}
-
-} // namespace
-
  namespace luci
  {
  
@@ -2208,22 +36,10 @@ bool NodeSummaryBuilder::build(const loco::Node *node, locop::NodeSummary &s) co
      return true;
    }
  
-#define BUILD_GRP(GRP)                                   \
-  do                                                     \
-  {                                                      \
-    if (SummaryBuilderLet<SB::GRP>(_tbl).build(node, s)) \
-      return true;                                       \
-  } while (false)
-
-  BUILD_GRP(ABC);
-  BUILD_GRP(DEF);
-  BUILD_GRP(GHIJ);
-  BUILD_GRP(KLMN);
-  BUILD_GRP(OPQR);
-  BUILD_GRP(STUV);
-  BUILD_GRP(WXYZ);
-  BUILD_GRP(CIRC);
-  BUILD_GRP(VIRT);
+  if (CircleNodeSummaryBuilder().build(node, _tbl, s))
+  {
+    return true;
+  }
  
    return false;
  }
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt

index ec8e0b0d617d6d6409d5d840dd1c1bc716854183..f28207df29786e2b4c7d1a31e659404cfda8189c 100644 (file)
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -13,7 +13,7 @@ target_link_libraries(luci_partition PUBLIC luci_lang)
  target_link_libraries(luci_partition PRIVATE luci_service)
  target_link_libraries(luci_partition PRIVATE luci_log)
  target_link_libraries(luci_partition PRIVATE luci_logex)
-target_link_libraries(luci_partition PRIVATE mio_circle)
+target_link_libraries(luci_partition PRIVATE mio_circle04)
  target_link_libraries(luci_partition PRIVATE nncc_common)
  target_link_libraries(luci_partition PRIVATE pepper_csv2vec)
  target_link_libraries(luci_partition PRIVATE oops)
diff --git a/compiler/luci/partition/src/ConnectNode.h b/compiler/luci/partition/src/ConnectNode.h

index ebbff7a6a060cbc5f50d72889c9e0e8c33ab625a..e60567c69a2fd031ff70bf822caed1a6258fa185 100644 (file)
--- a/compiler/luci/partition/src/ConnectNode.h
+++ b/compiler/luci/partition/src/ConnectNode.h
@@ -161,6 +161,7 @@ public:
    void visit(const luci::CircleSquaredDifference *) final;
    void visit(const luci::CircleSqueeze *) final;
    void visit(const luci::CircleStridedSlice *) final;
+  void visit(const luci::CircleSVDF *) final;
    void visit(const luci::CircleSub *) final;
    void visit(const luci::CircleSum *) final;
    void visit(const luci::CircleTanh *) final;
@@ -197,6 +198,7 @@ public:
    void visit(const luci::CircleTopKV2Out *) final;
    void visit(const luci::CircleUniqueOut *) final;
    void visit(const luci::CircleUnpackOut *) final;
+  void visit(const luci::CircleVariable *) final;
    void visit(const luci::CircleWhileOut *) final;
  
  public:
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp

new file mode 100644 (file)

index 0000000..f661a79
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace
+{
+
+void connect(luci::ConnectNode *cn, const luci::CircleSVDF *node)
+{
+  auto *cloned = loco::must_cast<luci::CircleSVDF *>(cn->find_clone(node));
+
+  luci::CircleNode *input = loco::must_cast<luci::CircleNode *>(node->input());
+  luci::CircleNode *weight_feature = loco::must_cast<luci::CircleNode *>(node->weight_feature());
+  luci::CircleNode *weight_time = loco::must_cast<luci::CircleNode *>(node->weight_time());
+  luci::CircleNode *bias = loco::must_cast<luci::CircleNode *>(node->bias());
+  luci::CircleNode *input_activation_state =
+    loco::must_cast<luci::CircleNode *>(node->input_activation_state());
+
+  cloned->input(cn->find_clone(input));
+  cloned->weight_feature(cn->find_clone(weight_feature));
+  cloned->weight_time(cn->find_clone(weight_time));
+  cloned->bias(cn->find_clone(bias));
+  cloned->input_activation_state(cn->find_clone(input_activation_state));
+}
+
+} // namespace
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleSVDF *node) { connect(this, node); }
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp

new file mode 100644 (file)

index 0000000..5fae520
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+#include "ConnectNode.test.h"
+
+#include <luci/Service/CircleNodeClone.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class NodeGraphlet : public NodeGraphletT<luci::CircleSVDF>
+{
+public:
+  NodeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    NodeGraphletT<luci::CircleSVDF>::init(g);
+
+    _node->fusedActivationFunction(luci::FusedActFunc::RELU);
+  }
+};
+
+class TestNodeGraph : public TestIsOGraph<5>, public NodeGraphlet
+{
+public:
+  TestNodeGraph() = default;
+
+public:
+  void init(const ShapeU32 shape)
+  {
+    TestIsOGraph<5>::init({shape, shape, shape, shape, shape}, shape);
+    NodeGraphlet::init(g());
+
+    node()->input(input(0));
+    node()->weight_feature(input(1));
+    node()->weight_time(input(2));
+    node()->bias(input(3));
+    node()->input_activation_state(input(4));
+
+    output()->from(node());
+  }
+};
+
+} // namespace
+
+TEST(ConnectNodeTest, connect_SVDF)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+  cth.clone_connect(node, clone);
+
+  ASSERT_EQ(5, clone->arity());
+  ASSERT_EQ(cth.inputs(0), clone->arg(0));
+  ASSERT_EQ(cth.inputs(1), clone->arg(1));
+  ASSERT_EQ(cth.inputs(2), clone->arg(2));
+  ASSERT_EQ(cth.inputs(3), clone->arg(3));
+  ASSERT_EQ(cth.inputs(4), clone->arg(4));
+}
+
+TEST(ConnectNodeTest, connect_SVDF_NEG)
+{
+  TestNodeGraph tng;
+  tng.init({2, 3});
+
+  ConnectionTestHelper cth;
+  cth.prepare_inputs_miss(&tng);
+
+  auto *node = tng.node();
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(node));
+
+  auto *clone = luci::clone_node(node, cth.graph_clone());
+  ASSERT_NO_THROW(loco::must_cast<luci::CircleSVDF *>(clone));
+
+  EXPECT_ANY_THROW(cth.clone_connect(node, clone));
+}
diff --git a/compiler/luci/partition/src/Nodes/CircleVariable.cpp b/compiler/luci/partition/src/Nodes/CircleVariable.cpp

new file mode 100644 (file)

index 0000000..f7f6f21
--- /dev/null
+++ b/compiler/luci/partition/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConnectNode.h"
+
+namespace luci
+{
+
+void ConnectNode::visit(const luci::CircleVariable *)
+{
+  // Nothing to do
+}
+
+} // namespace luci
diff --git a/compiler/luci/partition/src/PartitionIRDump.cpp b/compiler/luci/partition/src/PartitionIRDump.cpp

index 4f2c268004dbf5e51d542b4a2c5a78892f7b51eb..0fabfc416e10e02e83c5caa0dd5476e41dcce2ad 100644 (file)
--- a/compiler/luci/partition/src/PartitionIRDump.cpp
+++ b/compiler/luci/partition/src/PartitionIRDump.cpp
@@ -32,18 +32,18 @@ void dump(std::ostream &os, const PNode *pnode)
  void dump(std::ostream &os, const PGroup *pgroup)
  {
    os << "--- PGroup: " << pgroup->group << std::endl;
-  os << "Input(s): ";
+  os << "Input(s): [ ";
    for (auto &node_in : pgroup->inputs)
      os << node_in->name() << " ";
-  os << std::endl;
+  os << "]" << std::endl;
    for (auto &pnode : pgroup->pnodes)
    {
      dump(os, pnode.get());
    }
-  os << "Output(s): ";
+  os << "Output(s): [ ";
    for (auto &node_out : pgroup->outputs)
      os << node_out->name() << " ";
-  os << std::endl;
+  os << "]" << std::endl;
  }
  
  void dump(std::ostream &os, const PGroups *pgroups)
@@ -57,7 +57,8 @@ void dump(std::ostream &os, const PGroups *pgroups)
    {
      auto node = it->first;
      auto group = it->second;
-    os << "  Node: " << node << "(" << node->name() << "): " << group << std::endl;
+    os << "  Node: " << node << "(" << luci::opcode_name(node) << "," << node->name()
+       << "): " << group << std::endl;
    }
  }
  
diff --git a/compiler/luci/partition/src/PartitionMerge.cpp b/compiler/luci/partition/src/PartitionMerge.cpp

index c517bf93fefaf859ca41e9772cbaed696f9eae19..4c3971bd8fea63b592f3efb0f28a62762fe66055 100644 (file)
--- a/compiler/luci/partition/src/PartitionMerge.cpp
+++ b/compiler/luci/partition/src/PartitionMerge.cpp
@@ -58,9 +58,6 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
      //         we need to clone this CircleConst for each graph of the group.
      if (dynamic_cast<const luci::CircleConst *>(input) != nullptr)
        continue;
-    // Skip also for OutputExclude
-    if (dynamic_cast<const luci::CircleOutputExclude *>(input) != nullptr)
-      continue;
  
      auto input_group = pgroups->group_of(input);
      // NOTE: all the nodes should be registered and return should be valid group.
@@ -87,7 +84,7 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
          input_pgroup = pgroup_input;
        else
        {
-        if (input_pgroup != pgroup_input)
+        if (input_pgroup->group != pgroup_input->group)
            return false;
        }
      }
@@ -95,6 +92,48 @@ bool is_input_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
    return true;
  }
  
+/**
+ * @brief return true if there is only one output and is fed to same group of nodes
+ * @note  pgroups is used to find group of pgroup
+ *        ex)
+ *                     /-- pgroup_user_1 (grp_1)
+ *           --- pgroup
+ *                     \-- pgroup_user_2 (grp_2)
+ *
+ *           return false if grp_1 != grp_2
+ */
+bool is_output_same(const luci::PGroup *pgroup, const luci::PGroups *pgroups)
+{
+  assert(pgroups != nullptr);
+  assert(pgroup != nullptr);
+
+  std::string group;
+  for (auto &output : pgroup->outputs)
+  {
+    // get output_group
+    auto output_group = pgroups->group_of(output);
+    assert(not output_group.empty());
+    if (output_group.empty())
+      output_group = pgroups->default_group;
+
+    // find all PGroup that uses output
+    for (auto &pgroup_user : pgroups->pgroups)
+    {
+      for (auto &user_inputs : pgroup_user->inputs)
+      {
+        if (output == user_inputs)
+        {
+          // OK, these are connected, check group is same
+          if (pgroup_user->group != output_group)
+            return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
  /**
   * @brief merge pgroup into pgroup_i
   * @note  output of pgroup_i should be input of pgroup
@@ -191,6 +230,9 @@ std::unique_ptr<luci::PGroups> merge_pgroups(const luci::PGroups *s_pgroups)
          // skip if there are multiple inputs but inputs differ in group
          if (!is_input_same(pgroup.get(), d_pgroups.get()))
            continue;
+        // skip if pgroup has different group for other users of pgroup_i
+        if (!is_output_same(pgroup_i.get(), d_pgroups.get()))
+          continue;
          // TODO add more condition may be needed
  
          merge_into(pgroup.get(), pgroup_i.get());
diff --git a/compiler/luci/partition/src/PartitionPGroups.cpp b/compiler/luci/partition/src/PartitionPGroups.cpp

index 0080873e6c56a38d19538576bd2e33c0c45be30f..eaeacf9c4bd32a4eca6e6a04d8b9a4e073535295 100644 (file)
--- a/compiler/luci/partition/src/PartitionPGroups.cpp
+++ b/compiler/luci/partition/src/PartitionPGroups.cpp
@@ -46,6 +46,9 @@ public:
    bool visit(const luci::CircleUniqueOut *) final { return true; }
    bool visit(const luci::CircleUnpackOut *) final { return true; }
    bool visit(const luci::CircleWhileOut *) final { return true; }
+  // For inputs not used
+  bool visit(const luci::CircleOutputExclude *) final { return true; }
+  bool visit(const luci::CircleVariable *) final { return true; }
    // TODO add all virtual nodes
  
    // default is false
@@ -69,59 +72,80 @@ bool check_allocate_partition(const luci::CircleNode *node)
    return true;
  }
  
-class FindGroupToFollow final : public luci::CircleNodeVisitor<const std::string &>
+} // namespace
+
+namespace
  {
-public:
-  FindGroupToFollow(const luci::PartitionTable &partition, luci::PGroups *pgroups)
-    : _partition(partition), _pgroups(pgroups)
-  {
-    // NOTHING TODO
-  }
  
-private:
-  const std::string &groupof(const luci::CircleNode *input) const
+std::string group_from_partition(const luci::CircleNode *node,
+                                 const luci::PartitionTable &partition)
+{
+  LOGGER(l);
+
+  auto group = partition.default_group;
+
+  std::string opcodename; // opcodename or opname
+
+  switch (partition.comply)
    {
-    auto group = _pgroups->node2group[input];
-    assert(not group.empty());
-    if (group.empty())
-      return _partition.default_group;
-    return _pgroups->node2group[input];
+    case luci::PartitionTable::COMPLY::OPCODE:
+    {
+      opcodename = luci::opcode_name(node);
+      assert(!opcodename.empty());
+
+      auto it = partition.byopcodes.find(opcodename);
+      if (it != partition.byopcodes.end())
+        group = it->second;
+      break;
+    }
+    case luci::PartitionTable::COMPLY::OPNAME:
+    {
+      opcodename = node->name();
+      assert(!opcodename.empty());
+
+      auto it = partition.byopnames.find(opcodename);
+      if (it != partition.byopnames.end())
+        group = it->second;
+      break;
+    }
+
+    default:
+      throw std::runtime_error("Unsupported partition.comply");
    }
  
+  INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
+          << std::endl;
+
+  return group;
+}
+
+class IsVirtualInputNode final : public luci::CircleNodeVisitor<bool>
+{
  public:
-#define IMPLEMENT(CLASS)                                             \
-  const std::string &visit(const luci::CLASS *node) final            \
-  {                                                                  \
-    auto input = loco::must_cast<luci::CircleNode *>(node->input()); \
-    return groupof(input);                                           \
-  }
+  // TODO check CircleOutputDummy
+  bool visit(const luci::CircleOutputExclude *) final { return true; }
+  bool visit(const luci::CircleVariable *) final { return true; }
  
-  IMPLEMENT(CircleCustomOut);
-  IMPLEMENT(CircleIfOut);
-  IMPLEMENT(CircleNonMaxSuppressionV4Out);
-  IMPLEMENT(CircleNonMaxSuppressionV5Out);
-  IMPLEMENT(CircleSplitOut);
-  IMPLEMENT(CircleSplitVOut);
-  IMPLEMENT(CircleTopKV2Out);
-  IMPLEMENT(CircleUniqueOut);
-  IMPLEMENT(CircleUnpackOut);
-  IMPLEMENT(CircleWhileOut);
-
-#undef IMPLEMENT
-
-  // return empty for nothing to do
-  const std::string &visit(const luci::CircleNode *) final { return _empty_str; }
-
-private:
-  const luci::PartitionTable &_partition;
-  luci::PGroups *_pgroups = nullptr;
-  std::string _empty_str;
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
  };
  
-} // namespace
-
-namespace
+class IsMultiOutputNode final : public luci::CircleNodeVisitor<bool>
  {
+public:
+  bool visit(const luci::CircleCustom *) final { return true; }
+  bool visit(const luci::CircleIf *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV4 *) final { return true; }
+  bool visit(const luci::CircleNonMaxSuppressionV5 *) final { return true; }
+  bool visit(const luci::CircleSplit *) final { return true; }
+  bool visit(const luci::CircleSplitV *) final { return true; }
+  bool visit(const luci::CircleTopKV2 *) final { return true; }
+  bool visit(const luci::CircleUnique *) final { return true; }
+  bool visit(const luci::CircleUnpack *) final { return true; }
+  bool visit(const luci::CircleWhile *) final { return true; }
+  // default is false
+  bool visit(const luci::CircleNode *) final { return false; }
+};
  
  void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &group, uint32_t idx)
  {
@@ -136,17 +160,56 @@ void append(luci::CircleNode *node, luci::PGroups *pgroups, const std::string &g
  
    pgroup->pnodes.push_back(std::move(pnode));
  
+  IsVirtualInputNode queryvi;
    // Set input of PGroup
    for (uint32_t in = 0; in < node->arity(); ++in)
    {
      auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
-    // this input maybe CircleInput in source graph
-    // --> not confident this is safe
-    pgroup->inputs.push_back(input);
+    if (input->accept(&queryvi))
+    {
+      auto pnode = std::make_unique<luci::PNode>();
+      pnode->node = input;
+      pnode->group = group;
+      pnode->pgroup = pgroup.get();
+
+      pgroup->pnodes.push_back(std::move(pnode));
+
+      pgroups->node2group[input] = group;
+    }
+    else
+    {
+      // this input maybe CircleInput in source graph
+      // --> not confident this is safe
+      pgroup->inputs.push_back(input);
+    }
+  }
+
+  IsMultiOutputNode query;
+  if (node->accept(&query))
+  {
+    // Include CircleXXXOut virtual nodes in this group
+    auto succs = loco::succs(node);
+    for (auto &succ_node : succs)
+    {
+      auto nodeout = loco::must_cast<luci::CircleNode *>(succ_node);
+
+      auto pnode = std::make_unique<luci::PNode>();
+      pnode->node = nodeout;
+      pnode->group = group;
+      pnode->pgroup = pgroup.get();
+
+      pgroup->pnodes.push_back(std::move(pnode));
+
+      pgroups->node2group[nodeout] = group;
+
+      pgroup->outputs.push_back(nodeout);
+    }
+  }
+  else
+  {
+    // Set output of PGroup: node itself
+    pgroup->outputs.push_back(node);
    }
-  // Set output of PGroup: node itself or multiple virtual outputs
-  // TODO support multiple virtual outputs
-  pgroup->outputs.push_back(node);
  
    pgroups->node2group[node] = group;
    pgroups->id2pgroup[pgroup->id] = pgroup.get();
@@ -182,70 +245,9 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
      // check if node is normal node that we are interested
      if (check_allocate_partition(node))
      {
-      auto group = partition.default_group;
-
-      std::string opcodename; // opcodename or opname
-
-      switch (partition.comply)
-      {
-        case luci::PartitionTable::COMPLY::OPCODE:
-        {
-          opcodename = luci::opcode_name(node);
-          assert(!opcodename.empty());
-
-          auto it = partition.byopcodes.find(opcodename);
-          if (it != partition.byopcodes.end())
-            group = it->second;
-          break;
-        }
-        case luci::PartitionTable::COMPLY::OPNAME:
-        {
-          opcodename = node->name();
-          assert(!opcodename.empty());
-
-          auto it = partition.byopnames.find(opcodename);
-          if (it != partition.byopnames.end())
-            group = it->second;
-          break;
-        }
-
-        default:
-          throw std::runtime_error("Unsupported partition.comply");
-      }
-
-      INFO(l) << "Op: " << node->name() << ": " << opcodename << ", " << node << ", " << group
-              << std::endl;
+      auto group = group_from_partition(node, partition);
  
        append(node, pgroups.get(), group, idx);
-#if 0
-      auto pgroup = std::make_unique<luci::PGroup>();
-      pgroup->group = group;
-      pgroup->id = idx + 1;
-
-      auto pnode = std::make_unique<luci::PNode>();
-      pnode->node = node;
-      pnode->group = group;
-      pnode->pgroup = pgroup.get();
-
-      pgroup->pnodes.push_back(std::move(pnode));
-
-      // Set input of PGroup
-      for (uint32_t in = 0; in < node->arity(); ++in)
-      {
-        auto input = loco::must_cast<luci::CircleNode *>(node->arg(in));
-        // this input maybe CircleInput in source graph
-        // --> not confident this is safe
-        pgroup->inputs.push_back(input);
-      }
-      // Set output of PGroup: node itself or multiple virtual outputs
-      // TODO support multiple virtual outputs
-      pgroup->outputs.push_back(node);
-
-      pgroups->node2group[node] = group;
-      pgroups->id2pgroup[pgroup->id] = pgroup.get();
-
-      pgroups->pgroups.push_back(std::move(pgroup));
-#endif
      }
      else
      {
@@ -255,22 +257,6 @@ std::unique_ptr<luci::PGroups> produce_pgroups(const luci::Module *source,
      }
    }
  
-  // handle for virtual nodes like multiple outputs
-  // these nodes should follow group of the input
-  for (uint32_t idx = 0; idx < nodes->size(); ++idx)
-  {
-    auto node = loco::must_cast<luci::CircleNode *>(nodes->at(idx));
-
-    // for virtual nodes like CircleUnpackOut should follow it's input (owner)
-    // or just set to default
-    FindGroupToFollow query(partition, pgroups.get());
-    const auto &group = node->accept(&query);
-    if (not group.empty())
-    {
-      append(node, pgroups.get(), group, idx);
-    }
-  }
-
    return std::move(pgroups);
  }
  
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt

index b8b406a38468962873f253cb321364e0795de885..5237c6d3f5bb600571c7d0bc4052aba81bbfa211 100644 (file)
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  if(NOT FlatBuffers_FOUND)
    message(STATUS "FlatBuffers NOT FOUND")
    return()
@@ -23,11 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log)
  target_link_libraries(luci_pass PRIVATE luci_service)
  target_link_libraries(luci_pass PRIVATE luci_logex)
  target_link_libraries(luci_pass PRIVATE luci_profile)
-target_link_libraries(luci_pass PRIVATE mio_tflite260_inc)
+target_link_libraries(luci_pass PRIVATE mio_tflite280_inc)
  target_link_libraries(luci_pass PRIVATE nncc_common)
  target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
  target_link_libraries(luci_pass PRIVATE oops)
-target_link_libraries(luci_pass PRIVATE flatbuffers-1.12)
+target_link_libraries(luci_pass PRIVATE flatbuffers-2.0)
  install(TARGETS luci_pass DESTINATION lib)
  install(DIRECTORY include/ DESTINATION include
          FILES_MATCHING PATTERN "*.h")
@@ -43,5 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src)
  target_link_libraries(luci_pass_test luci_pass)
  target_link_libraries(luci_pass_test luci_lang)
  target_link_libraries(luci_pass_test luci_testhelper)
-target_link_libraries(luci_pass_test flatbuffers-1.12)
+target_link_libraries(luci_pass_test flatbuffers-2.0)
  #target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h

index 658563ecf0bf23ee7c0d09e1d3745c8ab0d541dc..c803898f6614254f308fef9523b59738611d17a5 100644 (file)
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -47,15 +47,12 @@ public:
        ResolveCustomOpBatchMatMul,
        ResolveCustomOpMatMul,
        ResolveCustomOpMaxPoolWithArgmax,
-      QuantizeDequantizeWeights,
-      QuantizeWithMinMax,
-      Requantize,
        FoldAddV2,
        FoldCast,
        FoldDepthwiseConv2D,
        FoldDequantize,
+      FoldGather,
        FoldSparseToDense,
-      ForceQuantParam,
        ForwardReshapeToUnaryOp,
        SparsifyTensorPass,
        FusePreActivationBatchNorm,
@@ -79,6 +76,7 @@ public:
        TransformMinReluToRelu6Pass,
        SubstituteStridedSliceToReshape,
        SubstituteTransposeToReshape,
+      RemoveRedundantQuantize,
        RemoveRedundantReshape,
        RemoveFakeQuant,
        RemoveQuantDequantSeq,
@@ -86,16 +84,6 @@ public:
  
      enum AlgorithmParameters
      {
-      // quantize
-      Quantize_input_model_dtype,
-      Quantize_output_model_dtype,
-      Quantize_granularity, // layer-wise or channel-wise
-      Quantize_tensor_names,
-      Quantize_scales,
-      Quantize_zero_points,
-      Quantize_input_type,
-      Quantize_output_type,
-
        // sparsify
        Sparsify_tensor_name,
        Sparsify_traversal_order,
@@ -114,8 +102,6 @@ public:
      virtual bool query(Algorithm) = 0;
      virtual void param(AlgorithmParameters, const std::string &) = 0;
      virtual const std::string param(AlgorithmParameters) const = 0;
-    virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
-    virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
    };
  
  public:
@@ -127,8 +113,6 @@ public:
  
    void optimize(loco::Graph *) const;
  
-  void quantize(loco::Graph *) const;
-
    void sparsify(loco::Graph *) const;
  
  private:
diff --git a/compiler/luci/pass/include/luci/CircleQuantizer.h b/compiler/luci/pass/include/luci/CircleQuantizer.h

new file mode 100644 (file)

index 0000000..4e7074d
--- /dev/null
+++ b/compiler/luci/pass/include/luci/CircleQuantizer.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_QUANTIZER_H__
+#define __LUCI_CIRCLE_QUANTIZER_H__
+
+#include <loco.h>
+
+#include <string>
+#include <vector>
+
+namespace luci
+{
+
+class CircleQuantizer final
+{
+public:
+  struct Options
+  {
+    struct LayerParam
+    {
+      std::string name;
+      std::string dtype;
+      std::string granularity;
+    };
+
+    enum Algorithm
+    {
+      QuantizeDequantizeWeights,
+      QuantizeWithMinMax,
+      Requantize,
+      CopyQuantParam,
+      ForceQuantParam,
+      ConvertToFakeQuantizedModel,
+    };
+
+    enum AlgorithmParameters
+    {
+      // quantize
+      Quantize_input_model_dtype,
+      Quantize_output_model_dtype,
+      Quantize_granularity, // layer-wise or channel-wise
+      Quantize_tensor_names,
+      Quantize_scales,
+      Quantize_zero_points,
+      Quantize_layer_params,
+
+      // copy_quantparam
+      Quantize_src_tensor_names,
+      Quantize_dst_tensor_names,
+
+      Quantize_input_type,
+      Quantize_output_type,
+      Quantize_TF_style_maxpool,
+    };
+
+    virtual ~Options() = default;
+
+    virtual void enable(Algorithm) = 0;
+    virtual bool query(Algorithm) = 0;
+    virtual void param(AlgorithmParameters, const std::string &) = 0;
+    virtual const std::string param(AlgorithmParameters) const = 0;
+    virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
+    virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
+
+    // Quantization parameters for multiple layers
+    virtual void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) = 0;
+    virtual std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const = 0;
+  };
+
+public:
+  // TODO maybe caller can provide Options as ctor parameters
+  Options *options(void);
+
+public:
+  void quantize(loco::Graph *) const;
+
+private:
+  std::unique_ptr<Options> _options;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_QUANTIZER_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h

new file mode 100644 (file)

index 0000000..91dd230
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ConvertToFakeQuantizedModelPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+#define __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to convert a quantized model to a fake-quantized fp32 model.
+ */
+struct ConvertToFakeQuantizedModelPass final : public logo::Pass
+{
+  ConvertToFakeQuantizedModelPass() {}
+
+  const char *name(void) const final { return "luci::ConvertToFakeQuantizedModelPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_CONVERT_TO_FAKE_QUANTIZED_MODEL_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h

new file mode 100644 (file)

index 0000000..18c9cd5
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CopyQuantParamPass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_COPY_QUANT_PARAM_PASS_H__
+#define __LUCI_COPY_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to copy quantparam (scale, zerop) of a tensor to another tensor
+ */
+class CopyQuantParamPass : public logo::Pass
+{
+public:
+  using TensorVector = std::vector<std::string>;
+
+public:
+  CopyQuantParamPass(TensorVector &src_tensors, TensorVector &dst_tensors)
+    : _src_tensors{src_tensors}, _dst_tensors{dst_tensors}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::CopyQuantParamPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  TensorVector _src_tensors;
+  TensorVector _dst_tensors;
+};
+
+} // namespace luci
+
+#endif //__LUCI_COPY_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h

new file mode 100644 (file)

index 0000000..de08c88
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldGatherPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_GATHER_PASS_H__
+#define __LUCI_FOLD_GATHER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold Gather to a constant tensor
+ *
+ */
+struct FoldGatherPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldGatherPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_GATHER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h

new file mode 100644 (file)

index 0000000..0c489fc
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamBackwardPass.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to propagate quantization parameters of an operator's output to input
+ */
+struct PropagateQParamBackwardPass final : public logo::Pass
+{
+  PropagateQParamBackwardPass(loco::DataType output) : _output_model_dtype(output) {}
+
+  const char *name(void) const final { return "luci::PropagateQParamBackwardPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  loco::DataType _output_model_dtype;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QPARAM_BACKWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h

new file mode 100644 (file)

index 0000000..952bd96
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/PropagateQParamForwardPass.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
+#define __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to propagate quantization parameters of an operator's input to output
+ */
+struct PropagateQParamForwardPass final : public logo::Pass
+{
+  PropagateQParamForwardPass(bool TF_style_maxpool) : _TF_style_maxpool(TF_style_maxpool) {}
+
+  PropagateQParamForwardPass() {}
+
+  const char *name(void) const final { return "luci::PropagateQParamForwardPass"; }
+
+  bool run(loco::Graph *g) final;
+
+private:
+  bool _TF_style_maxpool = false;
+};
+
+} // namespace luci
+
+#endif // __LUCI_PROPAGATE_QPARAM_FORWARD_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h

deleted file mode 100644 (file)

index 7e0c44b..0000000
--- a/compiler/luci/pass/include/luci/Pass/PropagateQuantParamPass.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
-#define __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
-
-#include <logo/Pass.h>
-
-namespace luci
-{
-
-/**
- * @brief  Class to propagate quantization parameters of an operator's output to input
- */
-struct PropagateQuantParamPass final : public logo::Pass
-{
-  const char *name(void) const final { return "luci::PropagateQuantParamPass"; }
-
-  bool run(loco::Graph *g) final;
-};
-
-} // namespace luci
-
-#endif // __LUCI_PROPAGATE_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h

index 5c9cd427f8d96f535a9ef2b74127d984fec10fb7..30c8db058e446534deeafde8ca01292eecdda792 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizationParameters.h
@@ -17,6 +17,10 @@
  #ifndef __LUCI_QUANTIZATION_PARAMETERS_H__
  #define __LUCI_QUANTIZATION_PARAMETERS_H__
  
+#include <loco.h>
+
+#include <string>
+
  namespace luci
  {
  
@@ -26,6 +30,13 @@ enum QuantizationGranularity
    ChannelWise = 1,
  };
  
+struct LayerInfo
+{
+  std::string name;
+  loco::DataType dtype;
+  QuantizationGranularity granularity;
+};
+
  } // namespace luci
  
  #endif // __LUCI_QUANTIZATION_PARAMETERS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h

index 68765ec5b6d4ff498e5d06c05f53817e909294c5..1825ee1aa605a33fed4724efc79f6183c8e9d07f 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -31,13 +31,31 @@ namespace luci
   */
  class QuantizeDequantizeWeightsPass : public logo::Pass
  {
+public:
+  struct Context
+  {
+    loco::DataType input_model_dtype = loco::DataType::Unknown;
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    std::vector<LayerInfo> layers_info;
+  };
+
+public:
+  QuantizeDequantizeWeightsPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+  {
+    // DO NOTHING
+  }
+
  public:
    QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
                                  QuantizationGranularity granularity)
-    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
-                                                                                        granularity}
    {
-    // DO NOTHING
+    _ctx = std::make_unique<Context>();
+    {
+      _ctx->input_model_dtype = input_model_dtype;
+      _ctx->output_model_dtype = output_model_dtype;
+      _ctx->granularity = granularity;
+    }
    }
    virtual const char *name(void) const { return "luci::QuantizeDequantizeWeightsPass"; }
  
@@ -45,9 +63,7 @@ public:
    bool run(loco::Graph *graph);
  
  private:
-  loco::DataType _input_model_dtype;
-  loco::DataType _output_model_dtype;
-  QuantizationGranularity _granularity;
+  std::unique_ptr<Context> _ctx;
  };
  
  } // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h

new file mode 100644 (file)

index 0000000..c852f88
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/QuantizePreCheckerPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+#define __LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to verify the input model has the form acceptable by quantizer
+ */
+class QuantizePreCheckerPass : public logo::Pass
+{
+public:
+  const char *name(void) const final { return "luci::QuantizePreCheckerPass"; }
+
+public:
+  bool run(loco::Graph *graph) final;
+};
+
+} // namespace luci
+
+#endif //__LUCI_QUANTIZE_PRE_CHECKER_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h

index 648abad7077f1f6009e4454057a863f0d2cb0390..ea6db85d1beb7101975a6704c1ab2cb0ea35f27a 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -23,6 +23,8 @@
  
  #include <luci/Pass/QuantizationParameters.h>
  
+#include <vector>
+
  namespace luci
  {
  
@@ -31,26 +33,41 @@ namespace luci
   */
  class QuantizeWithMinMaxPass : public logo::Pass
  {
+public:
+  struct Context
+  {
+    loco::DataType input_model_dtype = loco::DataType::Unknown;
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    loco::DataType input_type = loco::DataType::Unknown;
+    loco::DataType output_type = loco::DataType::Unknown;
+    bool TF_style_maxpool = false;
+    std::vector<LayerInfo> layers_info;
+  };
+
    // For backward-compatibility
    // TODO Remove this constructor
  public:
    QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
                           QuantizationGranularity granularity)
-    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
-      _granularity{granularity}, _input_type{output_model_dtype}, _output_type{output_model_dtype}
    {
-    // DO NOTHING
+    _ctx = std::make_unique<Context>();
+    {
+      _ctx->input_model_dtype = input_model_dtype;
+      _ctx->output_model_dtype = output_model_dtype;
+      _ctx->granularity = granularity;
+      _ctx->input_type = output_model_dtype;
+      _ctx->output_type = output_model_dtype;
+      _ctx->TF_style_maxpool = false;
+    }
    }
  
  public:
-  QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
-                         QuantizationGranularity granularity, loco::DataType input_type,
-                         loco::DataType output_type)
-    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype},
-      _granularity{granularity}, _input_type{input_type}, _output_type{output_type}
+  QuantizeWithMinMaxPass(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
    {
      // DO NOTHING
    }
+
    virtual const char *name(void) const { return "luci::QuantizeWithMinMaxPass"; }
  
  public:
@@ -61,11 +78,7 @@ private:
    void set_output_type(loco::Graph *graph) const;
  
  private:
-  loco::DataType _input_model_dtype;
-  loco::DataType _output_model_dtype;
-  QuantizationGranularity _granularity;
-  loco::DataType _input_type;
-  loco::DataType _output_type;
+  std::unique_ptr<Context> _ctx;
  };
  
  } // namespace luci
diff --git a/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h

new file mode 100644 (file)

index 0000000..3e76bcd
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/RemoveRedundantQuantizePass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+#define __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove redundant quantize operations
+ */
+struct RemoveRedundantQuantizePass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::RemoveRedundantQuantizePass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_REMOVE_REDUNDANT_QUANTIZE_PASS_H__
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.cpp

index c1a06bfdac007547a62da7718f91553841ce4d92..e3f126b156d79d44385639d65dcb3c7caf6e2bda 100644 (file)
--- a/compiler/luci/pass/src/BatchNormPatternFinder.cpp
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.cpp
@@ -44,10 +44,26 @@ bool is_batchnorm_add(const luci::CircleAdd *add, luci::CircleMul *&mul, luci::C
      return false;
    }
  
-  if (constant->rank() != 1)
+  uint32_t channel_dim = 0;
+
+  if (constant->rank() == 1)
+  {
+    channel_dim = constant->dim(0).value();
+  }
+  else if (constant->rank() == 4)
+  {
+    for (uint32_t i = 0; i < 3; i++)
+    {
+      if (constant->dim(i).value() != 1)
+        return false;
+    }
+    channel_dim = constant->dim(3).value();
+  }
+  else
+  {
      return false;
+  }
  
-  auto channel_dim = constant->dim(0);
    // Assumption: Layout is channel-last
    if (!(channel_dim == add->dim(add->rank() - 1)))
      return false;
@@ -90,10 +106,26 @@ bool is_batchnorm_mul(const luci::CircleMul *mul, luci::CircleNode *&pred_node,
      return false;
    }
  
-  if (constant->rank() != 1)
+  uint32_t channel_dim = 0;
+
+  if (constant->rank() == 1)
+  {
+    channel_dim = constant->dim(0).value();
+  }
+  else if (constant->rank() == 4)
+  {
+    for (uint32_t i = 0; i < 3; i++)
+    {
+      if (constant->dim(i).value() != 1)
+        return false;
+    }
+    channel_dim = constant->dim(3).value();
+  }
+  else
+  {
      return false;
+  }
  
-  auto channel_dim = constant->dim(0);
    // Assumption: Layout is channel-last
    if (!(channel_dim == mul->dim(mul->rank() - 1)))
      return false;
diff --git a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp

index 08e7fac1ccab1945e56d8198ec2fcd6b54d12679..cc8c5615f60d9df08f9213bd936de192da9116db 100644 (file)
--- a/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
+++ b/compiler/luci/pass/src/BatchNormPatternFinder.test.cpp
@@ -50,7 +50,7 @@ public:
      auto channel_size = *last_it;
  
      _add->shape(shape);
-    _add_beta->shape({channel_size});
+    set_beta_shape(channel_size);
      _add_beta->size<loco::DataType::FLOAT32>(channel_size);
      for (uint32_t i = 0; i < channel_size; i++)
        _add_beta->at<loco::DataType::FLOAT32>(i) = i;
@@ -62,11 +62,24 @@ public:
  public:
    luci::CircleAdd *add() { return _add; }
  
+protected:
+  virtual void set_beta_shape(uint32_t channel) = 0;
+
  protected:
    luci::CircleAdd *_add = nullptr;
    luci::CircleConst *_add_beta = nullptr;
  };
  
+class AddRank1BetaGraphlet : public AddBetaGraphlet
+{
+  void set_beta_shape(uint32_t channel) final { _add_beta->shape({channel}); }
+};
+
+class AddRank4BetaGraphlet : public AddBetaGraphlet
+{
+  void set_beta_shape(uint32_t channel) final { _add_beta->shape({1, 1, 1, channel}); }
+};
+
  /**
   * @brief Graphlet with Mul and Const as gamma from BatchNorm
   */
@@ -90,7 +103,7 @@ public:
      auto channel_size = *last_it;
  
      _mul->shape(shape);
-    _mul_gamma->shape({channel_size});
+    set_gamma_shape(channel_size);
      _mul_gamma->size<loco::DataType::FLOAT32>(channel_size);
      for (uint32_t i = 0; i < channel_size; i++)
        _mul_gamma->at<loco::DataType::FLOAT32>(i) = i;
@@ -102,15 +115,28 @@ public:
  public:
    luci::CircleMul *mul(void) { return _mul; }
  
+protected:
+  virtual void set_gamma_shape(uint32_t channel) = 0;
+
  protected:
    luci::CircleMul *_mul = nullptr;
    luci::CircleConst *_mul_gamma = nullptr;
  };
  
+class MulRank1GammaGraphlet : public MulGammaGraphlet
+{
+  void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({channel}); }
+};
+
+class MulRank4GammaGraphlet : public MulGammaGraphlet
+{
+  void set_gamma_shape(uint32_t channel) final { _mul_gamma->shape({1, 1, 1, channel}); }
+};
+
  /**
   * @brief Graph of Mul-Add pattern from BatchNorm
   */
-class MulAddGraph : public TestIOGraph, public AddBetaGraphlet, public MulGammaGraphlet
+class MulAddGraph : public TestIOGraph, public AddRank1BetaGraphlet, public MulRank1GammaGraphlet
  {
  public:
    MulAddGraph() = default;
@@ -118,8 +144,30 @@ public:
    void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
    {
      TestIOGraph::init(shape_in, shape_out);
-    MulGammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
-    AddBetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+    MulRank1GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+    AddRank1BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
+
+    // connect network
+    _mul->x(input());
+    _mul->y(_mul_gamma);
+    _add->x(_mul);
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+class MulAddRank4Graph : public TestIOGraph,
+                         public AddRank4BetaGraphlet,
+                         public MulRank4GammaGraphlet
+{
+public:
+  MulAddRank4Graph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    MulRank4GammaGraphlet::init(g(), shape_in, luci::FusedActFunc::NONE);
+    AddRank4BetaGraphlet::init(g(), shape_out, luci::FusedActFunc::RELU);
  
      // connect network
      _mul->x(input());
@@ -133,7 +181,7 @@ public:
  /**
   * @brief Graph of Add with Const
   */
-class AddGraph : public TestIOGraph, public AddBetaGraphlet
+class AddGraph : public TestIOGraph, public AddRank1BetaGraphlet
  {
  public:
    AddGraph() = default;
@@ -141,7 +189,24 @@ public:
    void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
    {
      TestIOGraph::init(shape_in, shape_out);
-    AddBetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+    AddRank1BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
+
+    // connect network
+    _add->x(input());
+    _add->y(_add_beta);
+    output()->from(_add);
+  }
+};
+
+class AddRank4Graph : public TestIOGraph, public AddRank4BetaGraphlet
+{
+public:
+  AddRank4Graph() = default;
+
+  void init(const ShapeU32 shape_in, const ShapeU32 shape_out)
+  {
+    TestIOGraph::init(shape_in, shape_out);
+    AddRank4BetaGraphlet::init(g(), shape_in, luci::FusedActFunc::RELU);
  
      // connect network
      _add->x(input());
@@ -160,6 +225,7 @@ public:
  
  protected:
    luci::test::MulAddGraph _mag;
+  luci::test::MulAddRank4Graph _mag_r4;
  };
  
  class BatchNormPatternFinderAddTest : public ::testing::Test
@@ -169,6 +235,7 @@ public:
  
  protected:
    luci::test::AddGraph _ag;
+  luci::test::AddRank4Graph _ag_r4;
  };
  
  TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add)
@@ -192,6 +259,19 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add2)
    ASSERT_TRUE(res);
  }
  
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_add_rank4)
+{
+  _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleMul *mul = nullptr;
+  luci::CircleConst *beta = nullptr;
+
+  auto res = luci::is_batchnorm_add(_mag_r4.add(), mul, beta);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, mul);
+  ASSERT_NE(nullptr, beta);
+}
+
  TEST_F(BatchNormPatternFinderAddTest, is_batchnorm_add_NEG)
  {
    _ag.init({1, 16, 16, 4}, {1, 16, 16, 4});
@@ -215,3 +295,16 @@ TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul)
    ASSERT_NE(nullptr, pred);
    ASSERT_NE(nullptr, gamma);
  }
+
+TEST_F(BatchNormPatternFinderMulAddTest, is_batchnorm_mul_rank4)
+{
+  _mag_r4.init({1, 16, 16, 4}, {1, 16, 16, 4});
+
+  luci::CircleNode *pred = nullptr;
+  luci::CircleConst *gamma = nullptr;
+
+  auto res = luci::is_batchnorm_mul(_mag_r4.mul(), pred, gamma);
+  ASSERT_TRUE(res);
+  ASSERT_NE(nullptr, pred);
+  ASSERT_NE(nullptr, gamma);
+}
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp

index 75f04b3b5b9fe8d7445652d455f6ea20261cd2c4..6dbb22d7c02934423d5f463867f4d0ceca696d6e 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -22,9 +22,9 @@
  #include "luci/Pass/FoldCastPass.h"
  #include "luci/Pass/FoldDepthwiseConv2DPass.h"
  #include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/FoldGatherPass.h"
  #include "luci/Pass/FoldSparseToDensePass.h"
  #include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
-#include "luci/Pass/ForceQuantParamPass.h"
  #include "luci/Pass/FuseActivationFunctionPass.h"
  #include "luci/Pass/FuseAddWithFullyConnectedPass.h"
  #include "luci/Pass/FuseAddWithTConvPass.h"
@@ -37,11 +37,11 @@
  #include "luci/Pass/FusePreActivationBatchNormPass.h"
  #include "luci/Pass/FuseTransposeWithMeanPass.h"
  #include "luci/Pass/MakeBatchNormGammaPositivePass.h"
-#include "luci/Pass/PropagateQuantParamPass.h"
  #include "luci/Pass/RemoveFakeQuantPass.h"
  #include "luci/Pass/RemoveQuantDequantSeqPass.h"
  #include "luci/Pass/RemoveRedundantReshapePass.h"
  #include "luci/Pass/RemoveRedundantTransposePass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
  #include "luci/Pass/RemoveUnnecessaryReshapePass.h"
  #include "luci/Pass/RemoveUnnecessarySlicePass.h"
  #include "luci/Pass/RemoveUnnecessaryStridedSlicePass.h"
@@ -52,9 +52,6 @@
  #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
  #include "luci/Pass/ResolveCustomOpMatMulPass.h"
  #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
-#include "luci/Pass/RequantizePass.h"
-#include "luci/Pass/QuantizeWithMinMaxPass.h"
-#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
  #include "luci/Pass/SparsifyTensorPass.h"
  #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
  #include "luci/Pass/SubstitutePackToReshapePass.h"
@@ -75,9 +72,6 @@
  
  #include "ModulePhase.h"
  #include "ProgressReporter.h"
-#include "helpers/Strings.h"
-
-#include "QuantizedModelVerifier.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <logo/Phase.h>
@@ -91,37 +85,17 @@ namespace
  
  using namespace luci;
  
-template <typename T> T lexical_cast(const std::string &str)
-{
-  std::istringstream ss;
-  ss.str(str);
-  T data;
-  ss >> data;
-  return data;
-}
-
-template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
-{
-  std::vector<T> result;
-  std::transform(sv.begin(), sv.end(), std::back_inserter(result),
-                 [](std::string str) -> T { return lexical_cast<T>(str); });
-  return result;
-}
-
  class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
  {
  public:
    void enable(Algorithm) final;
    void param(AlgorithmParameters, const std::string &) final;
    const std::string param(AlgorithmParameters) const final;
-  void params(AlgorithmParameters, std::vector<std::string> &) final;
-  std::vector<std::string> params(AlgorithmParameters) const final;
    bool query(Algorithm) final;
  
  private:
    std::vector<Algorithm> _algorithms;
    std::map<AlgorithmParameters, const std::string> _algorithm_params;
-  std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
  };
  
  void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
@@ -144,24 +118,6 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const
    }
  }
  
-void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
-{
-  _multiple_params[param] = vec;
-}
-
-std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const
-{
-  auto param_vec = _multiple_params.find(param);
-  if (param_vec != _multiple_params.end())
-  {
-    return param_vec->second;
-  }
-  else
-  {
-    return std::vector<std::string>();
-  }
-}
-
  bool OptimizeOptionsImpl::query(Algorithm algo)
  {
    std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
@@ -312,6 +268,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
    }
+  if (_options->query(Options::Algorithm::FoldGather))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldGatherPass>());
+  }
    if (_options->query(Options::Algorithm::FoldSparseToDense))
    {
      phase.emplace_back(std::make_unique<luci::FoldSparseToDensePass>());
@@ -368,6 +328,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
    }
+  if (_options->query(Options::Algorithm::RemoveRedundantQuantize))
+  {
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+  }
    if (_options->query(Options::Algorithm::ReplaceMulAddWithDepthwiseConv))
    {
      phase.emplace_back(std::make_unique<luci::ReplaceMulAddWithDepthwiseConvPass>());
@@ -417,174 +381,6 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    phase_runner.run(phase);
  }
  
-void CircleOptimizer::quantize(loco::Graph *g) const
-{
-  // Fake quantization of weights
-  if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
-  {
-    static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
-    static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
-    static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
-
-    auto input_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
-    auto output_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
-    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
-
-    if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input type: " +
-                               to_string(fakeq_supported_input_model_dtype));
-
-    if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output type: " +
-                               to_string(fakeq_supported_output_model_dtype));
-
-    if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
-      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
-                               to_string(fakeq_supported_granularity));
-
-    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
-        str_to_dtype(output_model_dtype) != loco::DataType::U8)
-      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
-
-    // Clear existing quantparams before doing fake quantization
-    for (auto node : loco::active_nodes(loco::output_nodes(g)))
-    {
-      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-      if (circle_node->quantparam() != nullptr)
-        circle_node->quantparam(nullptr);
-    }
-
-    luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype),
-                                                       str_to_dtype(output_model_dtype),
-                                                       str_to_granularity(granularity));
-    fake_quantizer.run(g);
-  }
-
-  // Actual quantization of weights, bias, and activation
-  if (_options->query(Options::Algorithm::QuantizeWithMinMax))
-  {
-    static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
-    static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
-    static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
-    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
-    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
-
-    auto input_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
-    auto output_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
-    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
-    auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
-    if (input_type.empty())
-      input_type = output_model_dtype;
-    auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
-    if (output_type.empty())
-      output_type = output_model_dtype;
-
-    if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(qwmm_supported_input_model_dtype));
-
-    if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(qwmm_supported_output_model_dtype));
-
-    if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
-      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
-                               to_string(qwmm_supported_granularity));
-
-    if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(qwmm_supported_input_type));
-
-    if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(qwmm_supported_output_type));
-
-    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
-        str_to_dtype(output_model_dtype) != loco::DataType::U8)
-      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
-
-    luci::QuantizeWithMinMaxPass quantizer(
-      str_to_dtype(input_model_dtype), str_to_dtype(output_model_dtype),
-      str_to_granularity(granularity), str_to_dtype(input_type), str_to_dtype(output_type));
-    quantizer.run(g);
-
-    // Post-quantization optimizations
-    logo::Phase phase;
-
-    phase.emplace_back(std::make_unique<luci::PropagateQuantParamPass>());
-
-    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
-    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
-    phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
-
-    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
-    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
-    phase_runner.attach(&prog);
-    phase_runner.run(phase);
-
-    // Verify the type/granularity of the quantized model
-    luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype),
-                                          str_to_granularity(granularity));
-    verifier.verify(g);
-  }
-
-  // Requantize
-  if (_options->query(Options::Algorithm::Requantize))
-  {
-    static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
-    static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
-
-    auto input_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
-    auto output_model_dtype =
-      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
-
-    if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
-      throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(rq_supported_input_model_dtype));
-
-    if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
-      throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(rq_supported_output_model_dtype));
-
-    luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
-                                     str_to_dtype(output_model_dtype));
-    requantizer.run(g);
-  }
-
-  // Force to write quantparam to specified tensors
-  // NOTE Only per-tensor (not per-channel) qparam can be written
-  if (_options->query(Options::Algorithm::ForceQuantParam))
-  {
-    ForceQuantParamPass::TensorVector tensors =
-      _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
-    auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
-    auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
-
-    // Cast scales/zero_points to proper types
-    ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
-    ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
-
-    ForceQuantParamPass fq(tensors, scales, zero_points);
-    fq.run(g);
-  }
-
-  logo::Phase phase;
-
-  // Do Shape/Type inference
-  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
-  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
-
-  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
-  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
-  phase_runner.attach(&prog);
-  phase_runner.run(phase);
-}
-
  void CircleOptimizer::sparsify(loco::Graph *g) const
  {
    if (_options->query(Options::Algorithm::SparsifyTensorPass))
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp

index a1b5c7f8045bb294fabe27dec917505f3fc22e51..041fc7d7584938f1f4886df76f8aec317264a16b 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.test.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -71,171 +71,3 @@ TEST(CircleOptimizerTest, sparsify_simple)
  
    SUCCEED();
  }
-
-TEST(CircleOptimizerTest, quantize_quantdequant_simple)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  o.quantize(&g);
-
-  SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_simple)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  o.quantize(&g);
-
-  SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_granularity, "layer");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_requant_simple)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-
-  o.quantize(&g);
-
-  SUCCEED();
-}
-
-TEST(CircleOptimizerTest, quantize_requant_input_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
-
-TEST(CircleOptimizerTest, quantize_requant_output_NEG)
-{
-  loco::Graph g;
-  luci::CircleOptimizer o;
-
-  auto options = o.options();
-
-  options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
-  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
-
-  EXPECT_THROW(o.quantize(&g), std::runtime_error);
-}
diff --git a/compiler/luci/pass/src/CircleQuantizer.cpp b/compiler/luci/pass/src/CircleQuantizer.cpp

new file mode 100644 (file)

index 0000000..ce38a90
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include "luci/Pass/CopyQuantParamPass.h"
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/RequantizePass.h"
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/FoldDequantizePass.h"
+#include "luci/Pass/QuantizePreCheckerPass.h"
+#include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizeDequantizeWeightsPass.h"
+
+#include "luci/Pass/CircleShapeInferencePass.h"
+#include "luci/Pass/CircleTypeInferencePass.h"
+
+// logo passes
+#include <logo/RemoveDeadNodeWithQueryPass.h>
+
+#include "ProgressReporter.h"
+#include "helpers/Strings.h"
+
+#include "QuantizedModelVerifier.h"
+
+#include <luci/IR/CircleNode.h>
+#include <logo/Phase.h>
+
+#include <memory>
+
+namespace
+{
+
+using namespace luci;
+using LayerParam = luci::CircleQuantizer::Options::LayerParam;
+
+template <typename T> T lexical_cast(const std::string &str)
+{
+  std::istringstream ss;
+  ss.str(str);
+  T data;
+  ss >> data;
+  return data;
+}
+
+template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
+{
+  std::vector<T> result;
+  std::transform(sv.begin(), sv.end(), std::back_inserter(result),
+                 [](std::string str) -> T { return lexical_cast<T>(str); });
+  return result;
+}
+
+class QuantizeOptionsImpl final : public luci::CircleQuantizer::Options
+{
+public:
+  void enable(Algorithm) final;
+  void param(AlgorithmParameters, const std::string &) final;
+  const std::string param(AlgorithmParameters) const final;
+  void params(AlgorithmParameters, std::vector<std::string> &) final;
+  std::vector<std::string> params(AlgorithmParameters) const final;
+  void layer_params(AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>> &) final;
+  std::vector<std::shared_ptr<LayerParam>> layer_params(AlgorithmParameters) const final;
+  bool query(Algorithm) final;
+
+private:
+  std::vector<Algorithm> _algorithms;
+  std::map<AlgorithmParameters, const std::string> _algorithm_params;
+  std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
+  std::map<AlgorithmParameters, std::vector<std::shared_ptr<LayerParam>>> _layer_params;
+};
+
+void QuantizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
+
+void QuantizeOptionsImpl::param(AlgorithmParameters param, const std::string &str)
+{
+  _algorithm_params.insert(std::pair<AlgorithmParameters, const std::string>(param, str));
+}
+
+const std::string QuantizeOptionsImpl::param(AlgorithmParameters param) const
+{
+  auto param_str = _algorithm_params.find(param);
+  if (param_str != _algorithm_params.end())
+  {
+    return param_str->second;
+  }
+  else
+  {
+    return std::string();
+  }
+}
+
+void QuantizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
+{
+  _multiple_params[param] = vec;
+}
+
+std::vector<std::string> QuantizeOptionsImpl::params(AlgorithmParameters param) const
+{
+  auto param_vec = _multiple_params.find(param);
+  if (param_vec != _multiple_params.end())
+  {
+    return param_vec->second;
+  }
+  else
+  {
+    return std::vector<std::string>();
+  }
+}
+
+void QuantizeOptionsImpl::layer_params(AlgorithmParameters param,
+                                       std::vector<std::shared_ptr<LayerParam>> &vec)
+{
+  _layer_params[param] = vec;
+}
+
+std::vector<std::shared_ptr<LayerParam>>
+QuantizeOptionsImpl::layer_params(AlgorithmParameters param) const
+{
+  auto param_vec = _layer_params.find(param);
+  if (param_vec != _layer_params.end())
+  {
+    return param_vec->second;
+  }
+  else
+  {
+    return std::vector<std::shared_ptr<LayerParam>>();
+  }
+}
+
+bool QuantizeOptionsImpl::query(Algorithm algo)
+{
+  std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
+  if (it == _algorithms.end())
+    return false;
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+CircleQuantizer::Options *CircleQuantizer::options(void)
+{
+  if (_options == nullptr)
+  {
+    _options = std::make_unique<QuantizeOptionsImpl>();
+  }
+
+  return _options.get();
+}
+
+void CircleQuantizer::quantize(loco::Graph *g) const
+{
+  // Fake quantization of weights
+  if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
+  {
+    static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
+    static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+    auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+    if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input type: " +
+                               to_string(fakeq_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output type: " +
+                               to_string(fakeq_supported_output_model_dtype));
+
+    if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
+      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+                               to_string(fakeq_supported_granularity));
+
+    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
+      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+    // Check dtype/granularity of layer params
+    for (auto layer_param : layer_params)
+    {
+      auto name = layer_param->name;
+      if (!in_array(to_lower_case(layer_param->dtype), fakeq_supported_output_model_dtype))
+      {
+        throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+                                 to_string(fakeq_supported_output_model_dtype));
+      }
+      if (!in_array(to_lower_case(layer_param->granularity), fakeq_supported_granularity))
+      {
+        throw std::runtime_error(
+          "Unsupported granularity in " + name +
+          ". List of supported granularity: " + to_string(fakeq_supported_granularity));
+      }
+    }
+
+    // Clear existing quantparams before doing fake quantization
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+      if (circle_node->quantparam() != nullptr)
+        circle_node->quantparam(nullptr);
+    }
+
+    auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+    {
+      ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+      ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      ctx->granularity = str_to_granularity(granularity);
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    luci::QuantizeDequantizeWeightsPass fake_quantizer(std::move(ctx));
+
+    fake_quantizer.run(g);
+  }
+
+  // Actual quantization of weights, bias, and activation
+  if (_options->query(Options::Algorithm::QuantizeWithMinMax))
+  {
+    static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
+    static const std::vector<std::string> qwmm_supported_input_type{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_output_type{"uint8", "int16"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+    auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
+    auto input_type = _options->param(Options::AlgorithmParameters::Quantize_input_type);
+    if (input_type.empty())
+      input_type = output_model_dtype;
+    auto output_type = _options->param(Options::AlgorithmParameters::Quantize_output_type);
+    if (output_type.empty())
+      output_type = output_model_dtype;
+
+    bool TF_style_maxpool =
+      _options->param(Options::AlgorithmParameters::Quantize_TF_style_maxpool) == "True";
+
+    auto layer_params = _options->layer_params(Options::AlgorithmParameters::Quantize_layer_params);
+
+    if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(qwmm_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(qwmm_supported_output_model_dtype));
+
+    if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
+      throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
+                               to_string(qwmm_supported_granularity));
+
+    if (!in_array(to_lower_case(input_type), qwmm_supported_input_type))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(qwmm_supported_input_type));
+
+    if (!in_array(to_lower_case(output_type), qwmm_supported_output_type))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(qwmm_supported_output_type));
+
+    if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
+      throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
+
+    // Check dtype/granularity of layer params
+    for (auto layer_param : layer_params)
+    {
+      auto name = layer_param->name;
+      if (!in_array(to_lower_case(layer_param->dtype), qwmm_supported_output_model_dtype))
+      {
+        throw std::runtime_error("Unsupported dtype in " + name + ". List of supported dtype: " +
+                                 to_string(qwmm_supported_output_model_dtype));
+      }
+      if (!in_array(to_lower_case(layer_param->granularity), qwmm_supported_granularity))
+      {
+        throw std::runtime_error(
+          "Unsupported granularity in " + name +
+          ". List of supported granularity: " + to_string(qwmm_supported_granularity));
+      }
+    }
+
+    // Input model checker for quantization
+    luci::QuantizePreCheckerPass input_model_checker{};
+    input_model_checker.run(g);
+
+    auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+    {
+      ctx->input_model_dtype = str_to_dtype(input_model_dtype);
+      ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      ctx->granularity = str_to_granularity(granularity);
+      ctx->input_type = str_to_dtype(input_type);
+      ctx->output_type = str_to_dtype(output_type);
+      ctx->TF_style_maxpool = TF_style_maxpool;
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    luci::QuantizeWithMinMaxPass quantizer(std::move(ctx));
+
+    quantizer.run(g);
+
+    auto verify_ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+    {
+      verify_ctx->output_model_dtype = str_to_dtype(output_model_dtype);
+      verify_ctx->granularity = str_to_granularity(granularity);
+      verify_ctx->input_type = str_to_dtype(input_type);
+      verify_ctx->output_type = str_to_dtype(output_type);
+      verify_ctx->TF_style_maxpool = TF_style_maxpool;
+
+      for (auto layer_param : layer_params)
+      {
+        LayerInfo info;
+        {
+          info.name = layer_param->name;
+          info.dtype = str_to_dtype(layer_param->dtype);
+          info.granularity = str_to_granularity(layer_param->granularity);
+        }
+        verify_ctx->layers_info.emplace_back(info);
+      }
+    }
+
+    // Verify the type/granularity of the quantized model
+    luci::QuantizedModelVerifier verifier(std::move(verify_ctx));
+
+    verifier.verify(g);
+  }
+
+  // Requantize
+  if (_options->query(Options::Algorithm::Requantize))
+  {
+    static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
+    static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
+
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
+
+    if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
+      throw std::runtime_error("Unsupported input type. List of supported input types: " +
+                               to_string(rq_supported_input_model_dtype));
+
+    if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
+      throw std::runtime_error("Unsupported output type. List of supported output types: " +
+                               to_string(rq_supported_output_model_dtype));
+
+    luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
+                                     str_to_dtype(output_model_dtype));
+    requantizer.run(g);
+  }
+
+  // Force to write quantparam to specified tensors
+  // NOTE Only per-tensor (not per-channel) qparam can be written
+  if (_options->query(Options::Algorithm::ForceQuantParam))
+  {
+    ForceQuantParamPass::TensorVector tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
+    auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
+    auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
+
+    // Cast scales/zero_points to proper types
+    ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
+    ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
+
+    ForceQuantParamPass fq(tensors, scales, zero_points);
+    fq.run(g);
+  }
+
+  // Copy quantparam of a tensor to another tensor
+  if (_options->query(Options::Algorithm::CopyQuantParam))
+  {
+    CopyQuantParamPass::TensorVector src_tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_src_tensor_names);
+    CopyQuantParamPass::TensorVector dst_tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_dst_tensor_names);
+
+    CopyQuantParamPass cq(src_tensors, dst_tensors);
+    cq.run(g);
+  }
+
+  // Convert quantized model to fake-quantized model
+  if (_options->query(Options::Algorithm::ConvertToFakeQuantizedModel))
+  {
+    luci::ConvertToFakeQuantizedModelPass fake_quantizer;
+    fake_quantizer.run(g);
+
+    logo::Phase phase;
+
+    // Default passes
+    phase.emplace_back(std::make_unique<logo::RemoveDeadNodeWithQueryPass>());
+    phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+    phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+    // Fold Dequantize Ops generated during fake quantization
+    phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Restart);
+    logo::PhaseRunner<logo::PhaseStrategy::Restart> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+
+  logo::Phase phase;
+
+  // Do Shape/Type inference
+  phase.emplace_back(std::make_unique<luci::CircleShapeInferencePass>());
+  phase.emplace_back(std::make_unique<luci::CircleTypeInferencePass>());
+
+  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/CircleQuantizer.test.cpp b/compiler/luci/pass/src/CircleQuantizer.test.cpp

new file mode 100644 (file)

index 0000000..5766d5f
--- /dev/null
+++ b/compiler/luci/pass/src/CircleQuantizer.test.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/CircleQuantizer.h"
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+using Algorithms = luci::CircleQuantizer::Options::Algorithm;
+using AlgorithmParameters = luci::CircleQuantizer::Options::AlgorithmParameters;
+
+TEST(CircleQuantizerTest, quantize_quantdequant_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_quantdequant_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeDequantizeWeights);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_granularity, "layer");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_minmax_gran_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::QuantizeWithMinMax);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_granularity, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_simple)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+  o.quantize(&g);
+
+  SUCCEED();
+}
+
+TEST(CircleQuantizerTest, quantize_requant_input_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
+
+TEST(CircleQuantizerTest, quantize_requant_output_NEG)
+{
+  loco::Graph g;
+  luci::CircleQuantizer o;
+
+  auto options = o.options();
+
+  options->enable(Algorithms::Requantize);
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
+
+  EXPECT_THROW(o.quantize(&g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp

index 2707140491dc3c2973161442aea439053a62649c..ce4f540350a06d4096196be7b528438fd3bf7575 100644 (file)
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -228,6 +228,9 @@ bool check_4d_reshape(loco::Node *node, const std::vector<int32_t> indices)
    if (input->shape_status() != luci::ShapeStatus::VALID)
      return false;
  
+  if (input->rank() != 4)
+    return false;
+
    if (reshape->shape_status() != luci::ShapeStatus::VALID)
      return false;
  
@@ -804,6 +807,8 @@ class ConvertNCHWToNHWC final : public luci::CircleNodeMutableVisitor<bool>
      return true;
    }
  
+  bool visit(luci::CircleElu *node) { return convert_unary_features<luci::CircleElu>(node); }
+
    bool visit(luci::CircleLeakyRelu *node)
    {
      return convert_unary_features<luci::CircleLeakyRelu>(node);
@@ -1240,6 +1245,7 @@ bool ConvertNCHWToNHWCPass::run(loco::Graph *g)
          break;
        case luci::CircleOpcode::ADD:
        case luci::CircleOpcode::CONCATENATION:
+      case luci::CircleOpcode::ELU:
        case luci::CircleOpcode::LEAKY_RELU:
        case luci::CircleOpcode::LOGISTIC:
        case luci::CircleOpcode::MAXIMUM:
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp

index c9412fbb1e89c2063957eb0f0f93aae45329dd6a..dd81d13800fd1e39e7feafd42a3c4e75ab08f79f 100644 (file)
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -264,6 +264,22 @@ public:
    luci::CircleConst *input2 = nullptr;
  };
  
+class EluGraph final : public SimpleGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    elu = g.nodes()->create<luci::CircleElu>();
+    elu->features(input);
+    elu->name("elu");
+
+    return elu;
+  }
+
+public:
+  luci::CircleElu *elu = nullptr;
+};
+
  class LeakyReluGraph final : public SimpleGraph
  {
  protected:
@@ -941,6 +957,26 @@ TEST(ConvertNCHWToNHWC, Concatenation)
    EXPECT_EQ(3, g.concat->axis());
  }
  
+TEST(ConvertNCHWToNHWC, Elu)
+{
+  EluGraph g;
+  g.init();
+
+  run_phase(&g.g, true, true);
+
+  check_pre_trans(g.elu->features());
+
+  auto elu_succs = loco::succs(g.elu);
+  EXPECT_EQ(1, elu_succs.size());
+  check_post_trans(*elu_succs.begin());
+
+  // Check elu shape
+  EXPECT_EQ(1, g.elu->dim(0).value());
+  EXPECT_EQ(4, g.elu->dim(1).value());
+  EXPECT_EQ(4, g.elu->dim(2).value());
+  EXPECT_EQ(16, g.elu->dim(3).value());
+}
+
  TEST(ConvertNCHWToNHWC, LeakyRelu)
  {
    LeakyReluGraph g;
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp

new file mode 100644 (file)

index 0000000..11970ff
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include "luci/Pass/QuantizationParameters.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+namespace
+{
+
+// Create Quantize Op whose dtype/shape/qparam are the same with node
+luci::CircleQuantize *create_quantize(luci::CircleNode *node)
+{
+  auto quantize = node->graph()->nodes()->create<luci::CircleQuantize>();
+  quantize->name(node->name() + "_Quantize");
+  quantize->dtype(node->dtype());
+  quantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    quantize->dim(i).set(node->dim(i).value());
+
+  quantize->shape_status(luci::ShapeStatus::VALID);
+
+  copy_quantparam(node, quantize);
+
+  luci::add_origin(quantize, luci::get_origin(node));
+
+  return quantize;
+}
+
+// Create Dequantize Op whose shape is the same with node
+luci::CircleDequantize *create_dequantize(luci::CircleNode *node)
+{
+  auto dequantize = node->graph()->nodes()->create<luci::CircleDequantize>();
+  dequantize->name(node->name() + "_Dequantize");
+  dequantize->dtype(loco::DataType::FLOAT32);
+  dequantize->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    dequantize->dim(i).set(node->dim(i).value());
+
+  dequantize->shape_status(luci::ShapeStatus::VALID);
+
+  luci::add_origin(dequantize, luci::get_origin(node));
+
+  return dequantize;
+}
+
+// Return true if node is quantized activation
+// 1. dtype is u8 or s16
+// 2. node has qparam
+bool is_quant_act(const luci::CircleNode *node)
+{
+  if (node->dtype() != loco::DataType::U8 and node->dtype() != loco::DataType::S16)
+    return false;
+
+  if (not node->quantparam())
+    return false;
+
+  return true;
+}
+
+// Return true if node is quantized const
+// 1. dtype is not fp32
+// 2. node has qparam
+// NOTE Quantized const can have the following types
+// u8 (weights, activation), s16 (weights, activation), s32 (bias), s64 (bias)
+bool is_quant_const(const luci::CircleConst *node)
+{
+  if (node->dtype() == loco::DataType::FLOAT32)
+    return false;
+
+  if (not node->quantparam())
+    return false;
+
+  return true;
+}
+
+// Insert dequantize Op after node
+void insert_dequantize(loco::Node *lnode)
+{
+  auto node = loco::must_cast<luci::CircleNode *>(lnode);
+  auto dequant = create_dequantize(node);
+  loco::replace(node).with(dequant);
+  dequant->input(node);
+}
+
+// Insert quantize Op after node and return the quantize Op
+luci::CircleQuantize *insert_quantize(loco::Node *lnode)
+{
+  auto node = loco::must_cast<luci::CircleNode *>(lnode);
+  auto quant = create_quantize(node);
+  loco::replace(node).with(quant);
+  quant->input(node);
+  return quant;
+}
+
+// Dequantize node
+void dequantize(luci::CircleNode *node)
+{
+  node->dtype(loco::DataType::FLOAT32);
+  node->quantparam(nullptr);
+}
+
+// Do fake quantization on quantized activation
+// 1. Insert Quantize-Dequantize Ops
+// 2. Update dtype/quantparam of node
+void fq_activation(luci::CircleNode *node)
+{
+  if (not is_quant_act(node))
+    return;
+
+  auto quant = insert_quantize(node);
+  insert_dequantize(quant);
+
+  dequantize(node);
+}
+
+#define RETURN_UNLESS(COND) \
+  if (not(COND))            \
+    return;
+
+// Visitor to do fake quantization for each Op
+// For non-const activation, insert Quantize-Dequantize after the ofm
+// For quantized const, insert Dequantize after the const
+struct FakeQuantize final : public luci::CircleNodeMutableVisitor<void>
+{
+  void visit(luci::CircleNode *node)
+  {
+    throw std::runtime_error("Unsupported op for fake quantization in " + node->name());
+  }
+
+  void visit(luci::CircleInput *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    auto quant = insert_quantize(node);
+    insert_dequantize(quant);
+
+    dequantize(node);
+
+    // Update graph input
+    const auto inputs = node->graph()->inputs();
+    auto graph_input = inputs->at(node->index());
+    graph_input->dtype(loco::DataType::FLOAT32);
+  }
+
+  void visit(luci::CircleOutput *node)
+  {
+    RETURN_UNLESS(is_quant_act(node));
+
+    dequantize(node);
+
+    // Update graph output
+    const auto outputs = node->graph()->outputs();
+    auto graph_output = outputs->at(node->index());
+    graph_output->dtype(loco::DataType::FLOAT32);
+  }
+
+  // For quantized const, insert Dequantize Op
+  void visit(luci::CircleConst *node)
+  {
+    RETURN_UNLESS(is_quant_const(node));
+
+    insert_dequantize(node);
+  }
+
+  // For non-const activation, insert Quantize-Dequantize Ops
+  // and dequantize the node
+  void visit(luci::CircleConv2D *node) { fq_activation(node); }
+  void visit(luci::CircleAdd *node) { fq_activation(node); }
+};
+
+#undef RETURN_UNLESS
+
+} // namespace
+
+namespace luci
+{
+
+bool ConvertToFakeQuantizedModelPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "ConvertToFakeQuantizedModelPass visit node: " << circle_node->name() << std::endl;
+
+    FakeQuantize fq;
+    circle_node->accept(&fq);
+  }
+
+  // One time run
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp

new file mode 100644 (file)

index 0000000..560d68a
--- /dev/null
+++ b/compiler/luci/pass/src/ConvertToFakeQuantizedModelPass.test.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <logo/Phase.h>
+
+#include "luci/Pass/ConvertToFakeQuantizedModelPass.h"
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+// Check the below pattern
+// Quantize (scale, zp) -> Dequantize (node)
+void check_q_dq(loco::Node *node, float scale, int64_t zp)
+{
+  auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+  EXPECT_TRUE(dequant != nullptr);
+  auto quant = dynamic_cast<luci::CircleQuantize *>(dequant->input());
+  EXPECT_TRUE(quant != nullptr);
+  auto qparam = quant->quantparam();
+  EXPECT_EQ(scale, qparam->scale[0]);
+  EXPECT_EQ(zp, qparam->zerop[0]);
+}
+
+// Check the below pattern
+// Dequantize (node)
+void check_dq(loco::Node *node)
+{
+  auto dequant = dynamic_cast<luci::CircleDequantize *>(node);
+  EXPECT_TRUE(dequant != nullptr);
+}
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  {
+    qparam->scale.push_back(scale);
+    qparam->zerop.push_back(zp);
+  }
+  node->quantparam(std::move(qparam));
+}
+
+/**
+ *  SimpleGraph for testing
+ *  - Child class should implement insertGraphBody()
+ *
+ *  Example (U8ConvGraph inherits SimpleGraph and create Conv2D Op)
+ *
+ *  BEFORE
+ *  - A model is quantized (ex: u8)
+ *
+ *  [Input(u8)] [Filter(u8)] [Bias(s32)]
+ *           \       |        /
+ *            \      |       /
+ *             \     |      /
+ *              [Conv2D(u8)]
+ *                   |
+ *              [Output(u8)]
+ *
+ *  AFTER
+ *  - Ops are converted to fp32
+ *  - Quantize/Dequantize Ops are inserted properly
+ *    - Q-DQ is inserted after non-const activation
+ *    - DQ is inserted after const
+ *
+ *  [Input(u8)]
+ *        |
+ *  [Quant(u8)]     [Filter(u8)]       [Bias(s32)]
+ *        |              |                 |
+ *  [Dequant(fp32)] [Dequant(fp32)] [Dequant(fp32)]
+ *             \         |          /
+ *              \        |         /
+ *               \       |        /
+ *                 [Conv2D(fp32)]
+ *                       |
+ *                  [Quant(u8)]
+ *                       |
+ *                 [Dequant(fp32)]
+ *                       |
+ *                  [Output(fp32)]
+ */
+template <loco::DataType T> class SimpleGraph
+{
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(T);
+    input->dtype(T);
+    output->dtype(T);
+    graph_output->dtype(T);
+
+    graph_input->shape({1, 4, 4, 4});
+    input->shape({1, 4, 4, 4});
+    output->shape({1, 4, 4, 4});
+    graph_output->shape({1, 4, 4, 4});
+
+    set_qparam(input, 1.0, 0);
+    set_qparam(output, 1.0, 0);
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SimpleGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class U8ConvGraph final : public SimpleGraph<loco::DataType::U8>
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    weights = g.nodes()->create<luci::CircleConst>();
+    bias = g.nodes()->create<luci::CircleConst>();
+
+    conv->dtype(loco::DataType::U8);
+    weights->dtype(loco::DataType::U8);
+    bias->dtype(loco::DataType::S32);
+
+    conv->shape({1, 4, 4, 4});
+    weights->shape({4, 1, 1, 4});
+    bias->shape({4});
+
+    weights->size<loco::DataType::U8>(16);
+    for (uint32_t i = 0; i < 16; i++)
+      weights->at<loco::DataType::U8>(i) = i;
+
+    bias->size<loco::DataType::S32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      bias->at<loco::DataType::S32>(i) = i;
+
+    set_qparam(conv, 2.0, 127);
+    set_qparam(weights, 2.0, 127);
+    set_qparam(bias, 2.0, 127);
+
+    conv->input(input);
+    conv->filter(weights);
+    conv->bias(bias);
+
+    conv->name("conv");
+    weights->name("weights");
+    bias->name("bias");
+
+    return conv;
+  }
+
+public:
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+};
+
+class FP32ConvGraph final : public SimpleGraph<loco::DataType::FLOAT32>
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    weights = g.nodes()->create<luci::CircleConst>();
+    bias = g.nodes()->create<luci::CircleConst>();
+
+    conv->dtype(loco::DataType::FLOAT32);
+    weights->dtype(loco::DataType::FLOAT32);
+    bias->dtype(loco::DataType::FLOAT32);
+
+    conv->shape({1, 4, 4, 4});
+    weights->shape({4, 1, 1, 4});
+    bias->shape({4});
+
+    weights->size<loco::DataType::FLOAT32>(16);
+    for (uint32_t i = 0; i < 16; i++)
+      weights->at<loco::DataType::FLOAT32>(i) = i;
+
+    bias->size<loco::DataType::FLOAT32>(4);
+    for (uint32_t i = 0; i < 4; i++)
+      bias->at<loco::DataType::FLOAT32>(i) = i;
+
+    conv->input(input);
+    conv->filter(weights);
+    conv->bias(bias);
+
+    conv->name("conv");
+    weights->name("weights");
+    bias->name("bias");
+
+    return conv;
+  }
+
+public:
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+};
+
+} // namespace
+
+TEST(ConvertToFakeQuantizedModelTest, U8Conv2D)
+{
+  U8ConvGraph g;
+  g.init();
+
+  luci::ConvertToFakeQuantizedModelPass fq;
+  fq.run(&g.g);
+
+  // Check ifm
+  check_q_dq(g.conv->input(), 1.0, 0);
+
+  // Check weights
+  check_dq(g.conv->filter());
+
+  // Check bias
+  check_dq(g.conv->bias());
+
+  // Check ofm
+  check_q_dq(g.output->from(), 2.0, 127);
+
+  SUCCEED();
+}
+
+TEST(ConvertToFakeQuantizedModelTest, F32Conv2D_NEG)
+{
+  FP32ConvGraph g;
+  g.init();
+
+  luci::ConvertToFakeQuantizedModelPass fq;
+  fq.run(&g.g);
+
+  uint32_t dequant_count = 0;
+  uint32_t quant_count = 0;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(&g.g)))
+  {
+    auto cnode = loco::must_cast<luci::CircleNode *>(node);
+    auto opcode = cnode->opcode();
+    if (opcode == luci::CircleOpcode::DEQUANTIZE)
+      dequant_count++;
+    if (opcode == luci::CircleOpcode::QUANTIZE)
+      quant_count++;
+  }
+
+  // Check no quant/dequant Op is inserted
+  EXPECT_EQ(0, quant_count);
+  EXPECT_EQ(0, dequant_count);
+}
diff --git a/compiler/luci/pass/src/CopyQuantParamPass.cpp b/compiler/luci/pass/src/CopyQuantParamPass.cpp

new file mode 100644 (file)

index 0000000..9b1bb0e
--- /dev/null
+++ b/compiler/luci/pass/src/CopyQuantParamPass.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CopyQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+struct SrcDst
+{
+  CircleNode *src = nullptr;
+  CircleNode *dst = nullptr;
+};
+
+} // namespace
+
+bool CopyQuantParamPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+
+  INFO(l) << "CopyQuantParamPass Start" << std::endl;
+
+  if (_src_tensors.size() != _dst_tensors.size())
+    throw std::runtime_error("The numbers of Source/Destination tensors do not match.");
+
+  // Return src/dst CircleNodes
+  auto get_src_dst = [&g](std::string src, std::string dst) {
+    SrcDst src_dst;
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto const cnode = loco::must_cast<CircleNode *>(node);
+      auto const name = cnode->name();
+      if (name == src)
+        src_dst.src = cnode;
+
+      if (name == dst)
+        src_dst.dst = cnode;
+    }
+    return src_dst;
+  };
+
+  for (uint32_t i = 0; i < _src_tensors.size(); i++)
+  {
+    auto src = _src_tensors[i];
+    auto dst = _dst_tensors[i];
+
+    auto nodes = get_src_dst(src, dst);
+    if (not nodes.src)
+      throw std::runtime_error("The tensor named " + src + " does not exist.");
+
+    if (not nodes.dst)
+      throw std::runtime_error("The tensor named " + dst + " does not exist.");
+
+    copy_quantparam(nodes.src, nodes.dst);
+
+    INFO(l) << "Quantparam of " << src << " is copied to " << dst << std::endl;
+  }
+
+  INFO(l) << "CopyQuantParamPass End" << std::endl;
+
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldGatherPass.cpp b/compiler/luci/pass/src/FoldGatherPass.cpp

new file mode 100644 (file)

index 0000000..f179d74
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "CircleOptimizerUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+
+namespace
+{
+
+/**
+ * Fold to const if
+ *
+ * 1. params: const and dtype = S32 or S64
+ * 2. indices: const and dtype = S32 or S64
+ *
+ * BEFORE
+ *
+ *    [CircleConst]              [CircleConst]
+ *         |                          |
+ *         +---------[Gather]---------+
+ *
+ * AFTER
+ *
+ *                [CircleConst]
+ *
+ **/
+template <loco::DataType InputT, loco::DataType IndexT>
+bool fold_gather(luci::CircleGather *gather_node)
+{
+  const auto params = loco::must_cast<luci::CircleConst *>(gather_node->params());
+  const auto indices = loco::must_cast<luci::CircleConst *>(gather_node->indices());
+
+  const auto rank = params->rank();
+  auto axis = gather_node->axis();
+  if (axis < 0)
+  {
+    axis += static_cast<int32_t>(rank);
+  }
+
+  if (axis < 0 or axis >= static_cast<int32_t>(rank))
+    throw std::runtime_error("Unsupported axis value");
+
+  const auto name = gather_node->name();
+  assert(name.length() > 0);
+
+  auto constant = gather_node->graph()->nodes()->create<luci::CircleConst>();
+  constant->dtype(InputT);
+  constant->name(name + "_folded");
+
+  constant->rank(rank + indices->rank() - 1);
+
+  assert(constant->rank() > 0);
+
+  std::vector<uint32_t> shape;
+  for (uint32_t i = 0; i < rank; ++i)
+  {
+    if (i != static_cast<uint32_t>(axis))
+    {
+      const auto dim = params->dim(i).value();
+      shape.push_back(dim);
+    }
+    else
+    {
+      for (uint32_t j = 0; j < indices->rank(); ++j)
+      {
+        const auto dim = indices->dim(j).value();
+        shape.push_back(dim);
+      }
+    }
+  }
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    constant->dim(i).set(shape.at(i));
+    size *= shape.at(i);
+  }
+
+  constant->size<InputT>(size);
+
+  uint32_t outer_size = 1;
+  for (uint32_t i = 0; i < static_cast<uint32_t>(axis); ++i)
+  {
+    outer_size *= params->dim(i).value();
+  }
+
+  uint32_t inner_size = 1;
+  for (uint32_t i = axis + 1; i < rank; ++i)
+  {
+    inner_size *= params->dim(i).value();
+  }
+
+  uint32_t coord_size = 1;
+  for (uint32_t i = 0; i < indices->rank(); ++i)
+  {
+    coord_size *= indices->dim(i).value();
+  }
+
+  const auto axis_size = params->dim(axis).value();
+
+  for (uint32_t outer = 0; outer < outer_size; ++outer)
+  {
+    for (uint32_t i = 0; i < coord_size; ++i)
+    {
+      constant->at<InputT>((outer * coord_size + i) * inner_size) =
+        params->at<InputT>((outer * axis_size + indices->at<IndexT>(i)) * inner_size);
+    }
+  }
+  loco::replace(gather_node).with(constant);
+
+  return true;
+}
+
+bool fold_gather(luci::CircleGather *gather_node)
+{
+  const auto params = dynamic_cast<luci::CircleConst *>(gather_node->params());
+  if (not params)
+    return false;
+
+  const auto indices = dynamic_cast<luci::CircleConst *>(gather_node->indices());
+  if (not indices)
+    return false;
+
+  // TODO: support more types
+  if (params->dtype() != loco::DataType::S32 and params->dtype() != loco::DataType::S64)
+    return false;
+
+  if (indices->dtype() != loco::DataType::S32 and indices->dtype() != loco::DataType::S64)
+    throw std::runtime_error("Unsupported type");
+
+  if (params->dtype() == loco::DataType::S64)
+  {
+    if (indices->dtype() == loco::DataType::S64)
+      return fold_gather<loco::DataType::S64, loco::DataType::S64>(gather_node);
+    else
+      return fold_gather<loco::DataType::S64, loco::DataType::S32>(gather_node);
+  }
+  else
+  {
+    if (indices->dtype() == loco::DataType::S64)
+      return fold_gather<loco::DataType::S32, loco::DataType::S64>(gather_node);
+    else
+      return fold_gather<loco::DataType::S32, loco::DataType::S32>(gather_node);
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for Gather Op
+ **/
+bool FoldGatherPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto gather_node = dynamic_cast<luci::CircleGather *>(node))
+    {
+      if (fold_gather(gather_node))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldGatherPass.test.cpp b/compiler/luci/pass/src/FoldGatherPass.test.cpp

new file mode 100644 (file)

index 0000000..b02c034
--- /dev/null
+++ b/compiler/luci/pass/src/FoldGatherPass.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldGatherPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *
+ *  Graph that has a Gather S64 Op with const inputs
+ *
+ *    BEFORE
+ *    params: [Const] (shape: [3], values: [1, 2, 3])
+ *    indices: [Const] (shape: [1], values: [1])
+ *
+ *     [params]     [indices]
+ *        |            |
+ *        ---[Gather]---
+ *
+ *    AFTER
+ *    [Const] (shape: [1], values: [2])
+ *
+ */
+class S64FoldGatherSimpleTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  S64FoldGatherSimpleTest() : luci::ConstantFoldingAddTestGraph({1}, loco::DataType::S64) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _gather = _g.nodes()->create<luci::CircleGather>();
+    _params = _g.nodes()->create<luci::CircleConst>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+
+    _gather->dtype(loco::DataType::S64);
+    _params->dtype(loco::DataType::S64);
+    _indices->dtype(loco::DataType::S64);
+
+    _params->shape({3});
+    _indices->shape({1});
+
+    _params->size<loco::DataType::S64>(3);
+    _params->at<loco::DataType::S64>(0) = 1;
+    _params->at<loco::DataType::S64>(1) = 2;
+    _params->at<loco::DataType::S64>(2) = 3;
+
+    _indices->size<loco::DataType::S64>(1);
+    _indices->at<loco::DataType::S64>(0) = 1;
+
+    _gather->params(_params);
+    _gather->indices(_indices);
+
+    _gather->name("gather");
+    _params->name("params");
+    _indices->name("indices");
+
+    return _gather;
+  }
+
+protected:
+  luci::CircleGather *_gather = nullptr;
+  luci::CircleConst *_params = nullptr;
+  luci::CircleConst *_indices = nullptr;
+};
+
+/**
+ *
+ *  Graph that has a Gather S32 Op with axis = 1 and with const inputs
+ *
+ *    BEFORE
+ *    params: [Const] (shape: [2, 3], values: [0, 1, 2, 3, 4, 5])
+ *    indices: [Const] (shape: [2], values: [2, 1])
+ *
+ *     [params]     [indices]
+ *        |            |
+ *        ---[Gather]---
+ *
+ *    AFTER
+ *    [Const] (shape: [2, 2], values: [2, 1, 5, 4])
+ *
+ */
+
+class S32FoldGatherTwoDimsTest : public luci::ConstantFoldingAddTestGraph, public ::testing::Test
+{
+public:
+  S32FoldGatherTwoDimsTest() : luci::ConstantFoldingAddTestGraph({4, 2}, loco::DataType::S32) {}
+
+  virtual void SetUp() { init(); }
+
+  loco::Node *createFoldedPattern() override
+  {
+    _gather = _g.nodes()->create<luci::CircleGather>();
+    _params = _g.nodes()->create<luci::CircleConst>();
+    _indices = _g.nodes()->create<luci::CircleConst>();
+
+    _gather->dtype(loco::DataType::S32);
+    _params->dtype(loco::DataType::S32);
+    _indices->dtype(loco::DataType::S32);
+
+    _params->shape({2, 3});
+    _indices->shape({2});
+
+    _params->size<loco::DataType::S32>(6);
+    _params->at<loco::DataType::S32>(0) = 0;
+    _params->at<loco::DataType::S32>(1) = 1;
+    _params->at<loco::DataType::S32>(2) = 2;
+    _params->at<loco::DataType::S32>(3) = 3;
+    _params->at<loco::DataType::S32>(4) = 4;
+    _params->at<loco::DataType::S32>(5) = 5;
+
+    _indices->size<loco::DataType::S32>(2);
+    _indices->at<loco::DataType::S32>(0) = 2;
+    _indices->at<loco::DataType::S32>(1) = 1;
+
+    _gather->params(_params);
+    _gather->indices(_indices);
+
+    _gather->axis(1);
+
+    _gather->name("gather");
+    _params->name("params");
+    _indices->name("indices");
+
+    return _gather;
+  }
+
+protected:
+  luci::CircleGather *_gather = nullptr;
+  luci::CircleConst *_params = nullptr;
+  luci::CircleConst *_indices = nullptr;
+};
+
+} // namespace
+
+TEST(FoldGatherTest, name)
+{
+  luci::FoldGatherPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(S64FoldGatherSimpleTest, fold_gather_simple)
+{
+  luci::FoldGatherPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S64, folded_const->dtype());
+  EXPECT_EQ(1, folded_const->rank());
+  EXPECT_EQ(1, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S64>(0));
+}
+
+TEST_F(S32FoldGatherTwoDimsTest, fold_gather_with_two_dim)
+{
+  luci::FoldGatherPass pass;
+  while (pass.run(graph()))
+    ;
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_NE(nullptr, folded_const);
+
+  // Chec type, shape, values of folded const
+  EXPECT_EQ(loco::DataType::S32, folded_const->dtype());
+  EXPECT_EQ(2, folded_const->rank());
+  EXPECT_EQ(2, folded_const->dim(0).value());
+  EXPECT_EQ(2, folded_const->dim(1).value());
+
+  EXPECT_EQ(2, folded_const->at<loco::DataType::S32>(0));
+  EXPECT_EQ(1, folded_const->at<loco::DataType::S32>(1));
+  EXPECT_EQ(5, folded_const->at<loco::DataType::S32>(2));
+  EXPECT_EQ(4, folded_const->at<loco::DataType::S32>(3));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_input_NEG)
+{
+  _indices->dtype(loco::DataType::FLOAT32);
+
+  luci::FoldGatherPass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
+
+TEST_F(S64FoldGatherSimpleTest, illegal_axis_NEG)
+{
+  _gather->axis(1);
+
+  luci::FoldGatherPass pass;
+  EXPECT_ANY_THROW(pass.run(graph()));
+}
diff --git a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp

index de973a4311f310767442a21edd266092d62a8331..68136b24427be437371b36493a92a7c72e7dce82 100644 (file)
--- a/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
+++ b/compiler/luci/pass/src/PropagateConcatenationQparam.test.cpp
@@ -186,12 +186,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
    // (1) normal case: qparam is propagated to input_1 and input_2
    // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
    // input_2
-  // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2
+  // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
    // (4) const input: input_1 is const. constant values are quantized
  
    // normal case: qparam of concat_node is propagated to input_1 and input_2
    SimpleConcatGraph g(loco::DataType::U8);
-  luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(&g.concat_node);
    EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
@@ -202,7 +202,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
    // input_1 is an input of input_2. qparam is propagated only to input_2
    SimpleConcatGraph g2(loco::DataType::U8);
    g2.input_2.input(&g2.input_1);
-  luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(&g2.concat_node);
    EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(77, g2.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
@@ -210,19 +210,19 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8)
    EXPECT_FLOAT_EQ(3.14, g2.input_2.quantparam()->scale[0]);
    EXPECT_EQ(77, g2.input_2.quantparam()->zerop[0]);
  
-  // input_1 is concat. qparam is propagated only to input_2
+  // input_1 is concat. qparam is propagated to subsequent concat
    SubsequentConcatGraph sg(loco::DataType::U8);
-  luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(&sg.concat_node);
    EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(77, sg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]);
-  EXPECT_EQ(1, sg.input_1.quantparam()->zerop[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
+  EXPECT_EQ(77, sg.input_1.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
    EXPECT_EQ(77, sg.input_2.quantparam()->zerop[0]);
  
    // input_1 is const. const values are quantized with the qparam of concat
    ConstInputConcatGraph cg(loco::DataType::U8);
-  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(cg.concat_node);
    EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
    EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
    const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -248,7 +248,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
  
    // concat has fused activation function
    g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(&g.concat_node);
    EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(77, g.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
@@ -261,7 +261,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_u8_NEG)
    // const values are quantized using its min/max
    ConstInputConcatGraph cg(loco::DataType::U8);
    cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::U8);
+  luci::propagate_concat_quantparam(cg.concat_node);
    EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
    EXPECT_EQ(10, cg.concat_node->quantparam()->zerop[0]);
    const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -283,12 +283,12 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
    // (1) normal case: qparam is propagated to input_1 and input_2
    // (2) input used by other Op: input_1 is an input of input_2. qparam is propagated only to
    // input_2
-  // (3) subsequent concat: input_1 is concat. qparam is propagated only to input_2
+  // (3) subsequent concat: input_1 is concat. qparam is propagated to subsequent concat
    // (4) const input: input_1 is const. constant values are quantized
  
    // normal case: qparam of concat_node is propagated to input_1 and input_2
    SimpleConcatGraph g(loco::DataType::S16);
-  luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(&g.concat_node);
    EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(3.14, g.input_1.quantparam()->scale[0]);
@@ -299,7 +299,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
    // input_1 is an input of input_2. qparam is propagated only to input_2
    SimpleConcatGraph g2(loco::DataType::S16);
    g2.input_2.input(&g2.input_1);
-  luci::propagate_concat_quantparam(&g2.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(&g2.concat_node);
    EXPECT_FLOAT_EQ(3.14, g2.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(0, g2.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(1.0, g2.input_1.quantparam()->scale[0]);
@@ -309,17 +309,17 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16)
  
    // input_1 is concat. qparam is propagated only to input_2
    SubsequentConcatGraph sg(loco::DataType::S16);
-  luci::propagate_concat_quantparam(&sg.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(&sg.concat_node);
    EXPECT_FLOAT_EQ(3.14, sg.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(0, sg.concat_node.quantparam()->zerop[0]);
-  EXPECT_FLOAT_EQ(1.0, sg.input_1.quantparam()->scale[0]);
+  EXPECT_FLOAT_EQ(3.14, sg.input_1.quantparam()->scale[0]);
    EXPECT_EQ(0, sg.input_1.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(3.14, sg.input_2.quantparam()->scale[0]);
    EXPECT_EQ(0, sg.input_2.quantparam()->zerop[0]);
  
    // input_1 is const. const values are quantized with the qparam of concat
    ConstInputConcatGraph cg(loco::DataType::S16);
-  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(cg.concat_node);
    EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
    EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
    const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
@@ -345,7 +345,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
  
    // concat has fused activation function
    g.concat_node.fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(&g.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(&g.concat_node);
    EXPECT_FLOAT_EQ(3.14, g.concat_node.quantparam()->scale[0]);
    EXPECT_EQ(0, g.concat_node.quantparam()->zerop[0]);
    EXPECT_FLOAT_EQ(1.0, g.input_1.quantparam()->scale[0]);
@@ -358,7 +358,7 @@ TEST(PropagateConcatenationQparam, propagate_concat_quantparam_i16_NEG)
    // const values are quantized using its min/max
    ConstInputConcatGraph cg(loco::DataType::S16);
    cg.concat_node->fusedActivationFunction(luci::FusedActFunc::RELU);
-  luci::propagate_concat_quantparam(cg.concat_node, loco::DataType::S16);
+  luci::propagate_concat_quantparam(cg.concat_node);
    EXPECT_FLOAT_EQ(0.1, cg.concat_node->quantparam()->scale[0]);
    EXPECT_EQ(0, cg.concat_node->quantparam()->zerop[0]);
    const auto cg_input_1 = loco::must_cast<luci::CircleConst *>(cg.concat_node->values(0));
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp

new file mode 100644 (file)

index 0000000..b497548
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.cpp
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+
+namespace
+{
+
+void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
+                        loco::DataType quant_type)
+{
+  uint32_t size = const_node->size<loco::DataType::FLOAT32>();
+
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i));
+    double quantized_data = std::round(data * scaling_factor_inv) + zerop;
+    constexpr double int_max = static_cast<double>(std::numeric_limits<int32_t>::max());
+    constexpr double int_min = static_cast<double>(std::numeric_limits<int32_t>::min());
+    quantized_data = std::min(int_max, std::max(int_min, quantized_data));
+
+    quantized_values[i] = static_cast<int32_t>(quantized_data);
+  }
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      const_node->dtype(loco::DataType::U8);      // change the type of tensor
+      const_node->size<loco::DataType::U8>(size); // resize tensor
+      for (uint32_t i = 0; i < size; ++i)
+        const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
+      break;
+    case loco::DataType::S16:
+      assert(zerop == 0);
+      const_node->dtype(loco::DataType::S16);      // change the type of tensor
+      const_node->size<loco::DataType::S16>(size); // resize tensor
+      for (uint32_t i = 0; i < size; ++i)
+        const_node->at<loco::DataType::S16>(i) =
+          std::min(32767, std::max(-32767, quantized_values[i]));
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+void overwrite_quantparam(const luci::CircleNode *source, luci::CircleNode *target)
+{
+  auto source_qparam = source->quantparam();
+  if (source_qparam == nullptr)
+    throw std::runtime_error("source quantparam is not found during overwrite");
+
+  auto target_qparam = target->quantparam();
+  if (target_qparam == nullptr)
+  {
+    auto quantparam = std::make_unique<luci::CircleQuantParam>();
+    target->quantparam(std::move(quantparam));
+    target_qparam = target->quantparam();
+
+    if (target_qparam == nullptr)
+      throw std::runtime_error("Creating new quant param failed");
+  }
+  target_qparam->min = source_qparam->min;
+  target_qparam->max = source_qparam->max;
+  target_qparam->scale = source_qparam->scale;
+  target_qparam->zerop = source_qparam->zerop;
+  target_qparam->quantized_dimension = source_qparam->quantized_dimension;
+}
+
+/**
+ * Tells if pad_v2 quantization should ignore padding value
+ * In that case padding const will be quantized with input parameters, and probably clipped
+ */
+bool ignore_pad_v2_const_quantization(const luci::CirclePadV2 *pad)
+{
+  // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly
+  // TODO use metadata hints to detect this case
+  auto const_value_node = dynamic_cast<const luci::CircleConst *>(pad->arg(2));
+  if (!const_value_node)
+    return false;
+  if (const_value_node->dtype() == loco::DataType::FLOAT32)
+  {
+    float const_value = const_value_node->at<loco::DataType::FLOAT32>(0);
+    if (const_value == std::numeric_limits<float>::lowest())
+      return true;
+  }
+  return false;
+}
+
+/** EXAMPLE
+ *
+ * BEFORE
+ *
+ *         [CircleNode]       [CircleConst]
+ *           (qparam1)           (FP32)
+ *                   \            /
+ *                    \          /
+ *                    [CirclePack]
+ *                     (qparam2)
+ *
+ *  AFTER
+ *
+ *         [CircleNode]        [CircleConst]   [CircleConst] <- Dead node
+ *           (qparam2)           (qparam2)         (FP32)
+ *                   \            /
+ *                    \          /
+ *                    [CirclePack]
+ *                     (qparam2)
+ *
+ * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs.
+ */
+void propagate_pack_quantparam(luci::CirclePack *pack)
+{
+  assert(pack->quantparam() != nullptr);
+
+  const auto num_inputs = pack->values_count();
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of pack Op");
+
+      const auto pack_qparam = pack->quantparam();
+      if (pack_qparam == nullptr)
+        throw std::runtime_error("quantparam of pack is not found during propagation");
+
+      assert(pack_qparam->scale.size() == 1);
+      assert(pack_qparam->zerop.size() == 1);
+      const auto scaling_factor = pack_qparam->scale[0];
+      const auto zerop = pack_qparam->zerop[0];
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, pack->dtype());
+      pack->values(i, new_const);
+      overwrite_quantparam(pack, new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        continue;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(pack, node);
+    }
+  }
+}
+
+/** EXAMPLE
+ *
+ *
+ *
+ * BEFORE
+ *
+ *      [CircleNode] [CircleConst] [CircleConst] [CircleNode]
+ *          (S32)        (S32)        (FP32)     (U8 qparam1)
+ *              \          \           /            /
+ *               \          \        /            /
+ *                \          \     /            /
+ *                 -------[CircleOneHot]-------
+ *                         (U8 qparam2)
+ *
+ *  AFTER
+ *
+ *      [CircleNode] [CircleConst] [CircleConst] [CircleNode]      [CircleConst] <- Dead node
+ *          (S32)        (S32)     (U8 qparam2)  (U8 qparam2)         (FP32)
+ *              \          \           /           /
+ *               \          \        /            /
+ *                \          \     /            /
+ *                 -------[CircleOneHot]-------
+ *                         (U8 qparam2)
+ *
+ * NOTE Quantization parameter of CircleOneHot (qparam2) is propagated to on_value/off_value.
+ */
+void propagate_one_hot_quantparam(luci::CircleOneHot *one_hot)
+{
+  assert(one_hot->quantparam() != nullptr);
+
+  // Propagate quantization parameters from output to inputs,
+  // to fit both input and counstant_value in one quant range.
+  auto quant_input = [one_hot](void (luci::CircleOneHot::*arg_setter)(loco::Node *),
+                               loco::Node *(luci::CircleOneHot::*arg_getter)() const) {
+    auto node = loco::must_cast<luci::CircleNode *>((one_hot->*arg_getter)());
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (is_quantized(const_node))
+        return;
+
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of OneHot Op");
+
+      const auto qparam = one_hot->quantparam();
+      if (qparam == nullptr)
+        throw std::runtime_error("quantparam of OneHot is not found during propagation");
+
+      assert(qparam->scale.size() == 1);
+      const auto scaling_factor = qparam->scale.at(0);
+      const auto zerop = qparam->zerop.at(0);
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, one_hot->dtype());
+      overwrite_quantparam(one_hot, new_const);
+      (one_hot->*arg_setter)(new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        return;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(one_hot, node);
+    }
+  };
+
+  quant_input(&luci::CircleOneHot::on_value, &luci::CircleOneHot::on_value);
+  quant_input(&luci::CircleOneHot::off_value, &luci::CircleOneHot::off_value);
+}
+
+} // namespace
+
+namespace luci
+{
+
+/** BEFORE
+ *
+ *         [CircleNode]             [CircleConst]
+ *         (U8 qparam1)                 (FP32)
+ *                   \                    /
+ *                    \                  /
+ *                    [CircleConcatenation]
+ *                        (U8 qparam2)
+ *
+ *  AFTER
+ *         [CircleNode]             [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam2)             (U8 qparam2)       (FP32)
+ *                   \                    /
+ *                    \                  /
+ *                    [CircleConcatenation]
+ *                        (U8 qparam2)
+ */
+void propagate_concat_quantparam(luci::CircleConcatenation *concat)
+{
+  assert(concat->quantparam() != nullptr);
+
+  const auto num_inputs = concat->numValues();
+
+  // Quantize const inputs using their values if concat has fused act function
+  if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
+  {
+    for (uint32_t i = 0; i < num_inputs; i++)
+    {
+      auto node = concat->arg(i);
+      auto const_node = dynamic_cast<luci::CircleConst *>(node);
+      if (const_node != nullptr)
+      {
+        auto new_const = luci::clone(const_node);
+        quant_const(new_const, concat->dtype());
+        concat->values(i, new_const);
+      }
+    }
+    return;
+  }
+
+  for (uint32_t i = 0; i < num_inputs; i++)
+  {
+    auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+
+      const auto concat_qparam = concat->quantparam();
+      assert(concat_qparam->scale.size() == 1);
+      const auto scaling_factor = concat_qparam->scale[0];
+      const auto zerop = concat_qparam->zerop[0];
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, concat->dtype());
+      concat->values(i, new_const);
+      overwrite_quantparam(concat, new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        continue;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(concat, node);
+    }
+  }
+}
+
+/** BEFORE
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]
+ *         (U8 qparam1)     (S32)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam2)
+ *
+ *  AFTER (case 1)
+ *
+ *  By default qparam is propagated from output to inputs to meet backend requirements.
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam2)     (S32)      (U8 qparam2)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam2)
+ *
+ *  AFTER (case 2)
+ *
+ * In case padded value is the lowest float value
+ * Qparam is propagated from input to output and constant.
+ *
+ * This is a special case for optimization constructed pad, needed to guarantee that
+ * extremely large negative constant do not stretch output quantization range.
+ *
+ *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
+ *         (U8 qparam1)     (S32)      (U8 qparam1)       (FP32)
+ *                   \        |         /
+ *                    \       |        /
+ *                      [CirclePadV2]
+ *                       (U8 qparam1)
+ */
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2)
+{
+  if (ignore_pad_v2_const_quantization(pad_v2))
+  {
+    // propagate input quantization paramters from input to output and padding const value
+    auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
+    overwrite_quantparam(pad_v2_input, pad_v2);
+
+    auto const_value_node = loco::must_cast<luci::CircleConst *>(
+      pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
+    auto new_const = luci::clone(const_value_node);
+
+    const auto pad_v2_input_qparam = pad_v2_input->quantparam();
+    assert(pad_v2_input_qparam != nullptr);
+    assert(pad_v2_input_qparam->scale.size() == 1);
+    const auto scaling_factor = pad_v2_input_qparam->scale.at(0);
+    const auto zerop = pad_v2_input_qparam->zerop.at(0);
+
+    quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+    overwrite_quantparam(pad_v2_input, new_const);
+    pad_v2->constant_values(new_const);
+    return;
+  }
+
+  // Propagate quantization paramters from output to inputs,
+  // to fit both input and counstant_value in one quant range.
+  auto quant_input = [pad_v2](void (CirclePadV2::*arg_setter)(loco::Node *), uint32_t arg) {
+    auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg));
+
+    // Quantize constant values
+    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    {
+      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
+      if (is_quantized(const_node))
+        return;
+
+      if (const_node->dtype() != loco::DataType::FLOAT32)
+        throw std::runtime_error("Unsupported data type for constant input of PadV2 Op");
+
+      const auto pad_v2_qparam = pad_v2->quantparam();
+      if (pad_v2_qparam == nullptr)
+        throw std::runtime_error("quantparam of PadV2 is not found during propagation");
+
+      assert(pad_v2_qparam->scale.size() == 1);
+      const auto scaling_factor = pad_v2_qparam->scale.at(0);
+      const auto zerop = pad_v2_qparam->zerop.at(0);
+
+      auto new_const = luci::clone(const_node);
+      quant_const_values(new_const, scaling_factor, zerop, pad_v2->dtype());
+      overwrite_quantparam(pad_v2, new_const);
+      (pad_v2->*arg_setter)(new_const);
+    }
+    else
+    {
+      const auto succs = loco::succs(node);
+      if (succs.size() > 1)
+        return;
+
+      // Non-const input must have been quantized
+      assert(node->quantparam() != nullptr);
+      overwrite_quantparam(pad_v2, node);
+    }
+  };
+
+  quant_input(&CirclePadV2::input, 0);
+  quant_input(&CirclePadV2::constant_values, 2);
+}
+
+} // namespace luci
+
+namespace
+{
+
+// Visitor to propagate quantization parameters backwards
+struct PropagateQParamBackward final : public luci::CircleNodeMutableVisitor<void>
+{
+  void visit(luci::CircleNode *) {}
+
+  void visit(luci::CircleConcatenation *node) { propagate_concat_quantparam(node); }
+
+  void visit(luci::CircleOneHot *node) { propagate_one_hot_quantparam(node); }
+
+  void visit(luci::CirclePack *node) { propagate_pack_quantparam(node); }
+
+  void visit(luci::CirclePadV2 *node) { propagate_pad_v2_quantparam(node); }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamBackwardPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+
+  // We use reverse post-order traversal as qparam is propagated backward
+  auto nodes = loco::postorder_traversal(loco::output_nodes(g));
+  std::reverse(nodes.begin(), nodes.end());
+  for (auto node : nodes)
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "PropagateQParamBackwardPass visit node: " << circle_node->name() << std::endl;
+
+    // We can't propagate non-existent qparam
+    if (circle_node->quantparam() == nullptr)
+      continue;
+
+    PropagateQParamBackward pqb;
+    circle_node->accept(&pqb);
+  }
+
+  // This pass is only run once, so return false
+  // TODO Refactoring not to return meaningless value
+  return false;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp

new file mode 100644 (file)

index 0000000..33af704
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamBackwardPass.test.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+using namespace luci;
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale.emplace_back(scale);
+  qparam->zerop.emplace_back(zp);
+
+  node->quantparam(std::move(qparam));
+}
+
+/**
+ * @brief Base Test Graph
+ */
+struct TestGraph
+{
+public:
+  virtual void init(void) = 0;
+};
+
+/**
+ *  Graph with two concats
+ *
+ *  [CircleInput]  [CircleConst]
+ *         \         /
+ *  [CircleConcatenation]  [CircleConst]
+ *           |                |
+ *          [CircleConcatenation]
+ *                  |
+ *            [CircleOutput]
+ *
+ *  BEFORE
+ *  - Concat1 and Concat 2 have different qparams
+ *
+ *  AFTER
+ *  - All Ops have the same qparam
+ */
+struct SubsequentConcatGraph : public TestGraph
+{
+public:
+  void init(void) final
+  {
+    // graph input and output
+    auto graph_input = g.inputs()->create();
+    auto graph_output = g.outputs()->create();
+
+    // input
+    input = g.nodes()->create<luci::CircleInput>();
+    input->index(graph_input->index());
+    input->shape({1, 4, 4, 3});
+    input->dtype(loco::DataType::U8);
+    set_qparam(input, 1.0, 1);
+
+    // const1
+    const1 = g.nodes()->create<luci::CircleConst>();
+    const1->shape({1, 4, 4, 3});
+    const1->dtype(loco::DataType::FLOAT32);
+    const1->size<loco::DataType::FLOAT32>(48);
+    for (uint32_t i = 0; i < 48; i++)
+      const1->at<loco::DataType::FLOAT32>(i) = i;
+
+    // concat1
+    concat1 = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat1->shape({1, 4, 4, 6});
+    concat1->dtype(loco::DataType::U8);
+    set_qparam(concat1, 2.0, 2);
+    concat1->values(0, input);
+    concat1->values(1, const1);
+    concat1->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    // const2
+    const2 = g.nodes()->create<luci::CircleConst>();
+    const2->shape({1, 4, 4, 3});
+    const2->dtype(loco::DataType::FLOAT32);
+    const2->size<loco::DataType::FLOAT32>(48);
+    for (uint32_t i = 0; i < 48; i++)
+      const2->at<loco::DataType::FLOAT32>(i) = i;
+
+    // concat2
+    concat2 = g.nodes()->create<luci::CircleConcatenation>(2);
+    concat2->shape({1, 4, 4, 9});
+    concat2->dtype(loco::DataType::U8);
+    set_qparam(concat2, 3.0, 3);
+    concat2->values(0, concat1);
+    concat2->values(1, const2);
+    concat2->fusedActivationFunction(luci::FusedActFunc::NONE);
+
+    // output
+    output = g.nodes()->create<luci::CircleOutput>();
+    output->index(graph_output->index());
+    output->from(concat2);
+    output->shape({1, 4, 4, 9});
+    output->dtype(loco::DataType::U8);
+    set_qparam(output, 3.0, 3);
+  }
+
+public:
+  loco::Graph g;
+  CircleInput *input = nullptr;
+  CircleConcatenation *concat1 = nullptr;
+  CircleConcatenation *concat2 = nullptr;
+  CircleConst *const1 = nullptr;
+  CircleConst *const2 = nullptr;
+  CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamBackwardPassTest, name)
+{
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamBackwardPassTest, subsequent_propagation)
+{
+  SubsequentConcatGraph graph;
+
+  graph.init();
+
+  luci::PropagateQParamBackwardPass pass(loco::DataType::U8);
+
+  pass.run(&graph.g);
+
+  EXPECT_EQ(3.0, graph.concat2->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.concat2->quantparam()->zerop[0]);
+
+  auto const2 = loco::must_cast<CircleNode *>(graph.concat2->values(1));
+  EXPECT_EQ(3.0, const2->quantparam()->scale[0]);
+  EXPECT_EQ(3, const2->quantparam()->zerop[0]);
+
+  EXPECT_EQ(3.0, graph.concat1->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.concat1->quantparam()->zerop[0]);
+
+  auto const1 = loco::must_cast<CircleNode *>(graph.concat1->values(1));
+  EXPECT_EQ(3.0, const1->quantparam()->scale[0]);
+  EXPECT_EQ(3, const1->quantparam()->zerop[0]);
+
+  EXPECT_EQ(3.0, graph.input->quantparam()->scale[0]);
+  EXPECT_EQ(3, graph.input->quantparam()->zerop[0]);
+}
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp

new file mode 100644 (file)

index 0000000..003e4c2
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include "QuantizationUtils.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Log.h>
+
+#include <iostream>
+
+namespace
+{
+
+bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
+{
+  assert(src->scale.size() == dst->scale.size());
+  assert(src->zerop.size() == dst->zerop.size());
+
+  // src and dst have the same qparam
+  if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
+      std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
+      src->quantized_dimension == dst->quantized_dimension)
+    return false;
+
+  dst->scale.assign(src->scale.begin(), src->scale.end());
+  dst->zerop.assign(src->zerop.begin(), src->zerop.end());
+  dst->quantized_dimension = src->quantized_dimension;
+  return true;
+}
+
+bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
+{
+  // Skip nodes that do not have quantparams
+  auto src_qparam = src->quantparam();
+  if (not src_qparam)
+    return false;
+
+  auto dst_qparam = dst->quantparam();
+  if (not dst_qparam)
+    return false;
+
+  return copy_qparam(src_qparam, dst_qparam);
+}
+
+//  Visitor to propagate quantization parameters
+struct PropagateQParamForward final : public luci::CircleNodeMutableVisitor<bool>
+{
+  PropagateQParamForward() = default;
+
+  bool visit(luci::CircleNode *) { return false; }
+
+  bool visit(luci::CircleGather *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->params());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleReshape *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->tensor());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleTranspose *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleStridedSlice *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleSplitOut *node)
+  {
+    auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(split->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleSplitVOut *node)
+  {
+    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(splitv->input());
+    return copy_qparam(input_node, node);
+  }
+
+  bool visit(luci::CircleUnpackOut *node)
+  {
+    auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+    auto input_node = loco::must_cast<luci::CircleNode *>(unpack->value());
+    return copy_qparam(input_node, node);
+  }
+
+  // Propagate qparam across Quantize op to ensure
+  // special qparams (pre-defined values, integer scale)
+  bool visit(luci::CircleQuantize *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+    // Skip if input_node is not quantized activation
+    if (input_node->dtype() != loco::DataType::U8 and input_node->dtype() != loco::DataType::S16)
+      return false;
+
+    // If input_node and node have the same dtype, Quantize op
+    // will do rescale, not requantize for mixed-precision
+    if (input_node->dtype() == node->dtype())
+      return false;
+
+    assert(node->dtype() == loco::DataType::U8 or node->dtype() == loco::DataType::S16);
+
+    auto prev_qparam = node->quantparam();
+    assert(prev_qparam);
+    assert(prev_qparam->scale.size() == 1);
+    assert(prev_qparam->zerop.size() == 1);
+
+    const auto prev_scale = prev_qparam->scale[0];
+    const auto prev_zerop = prev_qparam->zerop[0];
+
+    auto qtype = luci::activation_qtype(input_node);
+    switch (qtype)
+    {
+      case luci::ActivationQType::PreDefinedValue:
+        node->quantparam(luci::make_predefined_qparam(input_node->opcode(), node->dtype()));
+        break;
+      case luci::ActivationQType::IntScale:
+        luci::set_int_scale(node);
+        break;
+      default:
+        break;
+    }
+
+    assert(node->quantparam());
+    assert(node->quantparam()->scale.size() == 1);
+    assert(node->quantparam()->zerop.size() == 1);
+
+    const auto scale = node->quantparam()->scale[0];
+    const auto zerop = node->quantparam()->zerop[0];
+
+    // Compare qparam with saved values to detect update
+    return scale != prev_scale or zerop != prev_zerop;
+  }
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool PropagateQParamForwardPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  LOGGER(l);
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    INFO(l) << "PropagateQParamForwardPass visit node: " << circle_node->name() << std::endl;
+
+    PropagateQParamForward pqp;
+    if (circle_node->accept(&pqp))
+      changed = true;
+
+    if (_TF_style_maxpool)
+    {
+      if (auto maxpool = dynamic_cast<luci::CircleMaxPool2D *>(node))
+      {
+        auto input = loco::must_cast<luci::CircleNode *>(maxpool->value());
+        copy_qparam(input, maxpool);
+      }
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp

new file mode 100644 (file)

index 0000000..a734c08
--- /dev/null
+++ b/compiler/luci/pass/src/PropagateQParamForwardPass.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/PropagateQParamForwardPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
+                   const std::vector<int64_t> &zp)
+{
+  assert(node->quantparam() == nullptr);
+
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale = scale;
+  quantparam->zerop = zp;
+  node->quantparam(std::move(quantparam));
+}
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *        [Conv] (qparam 1)
+ *           |
+ *       [Reshape] (qparam 2)
+ *
+ *  AFTER
+ *
+ *        [Conv] (qparam 2)
+ *           |
+ *       [Reshape] (qparam 2)
+ *
+ */
+class SimpleGraph
+{
+public:
+  SimpleGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    conv = g.nodes()->create<luci::CircleConv2D>();
+    reshape = g.nodes()->create<luci::CircleReshape>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
+    addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
+
+    conv->input(input);
+    reshape->tensor(conv);
+    output->from(reshape);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleConv2D *conv = nullptr;
+  luci::CircleReshape *reshape = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+/**
+ *  Test graph for forward propagation in Quantize Op
+ *
+ *  BEFORE
+ *
+ *         [Tanh U8] (qparam 1 - pre-defined for U8)
+ *             |
+ *       [Quantize S16] (qparam 2 - not pre-defined value)
+ *
+ *  AFTER
+ *
+ *         [Tanh U8] (qparam 1 - pre-defined for U8)
+ *             |
+ *       [Quantize S16] (qparam 3 - pre-defined for S16)
+ *
+ */
+class TanhQuantizeGraph
+{
+public:
+  TanhQuantizeGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    tanh = g.nodes()->create<luci::CircleTanh>();
+    quantize = g.nodes()->create<luci::CircleQuantize>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    tanh->dtype(loco::DataType::U8);
+    quantize->dtype(loco::DataType::S16);
+
+    addQuantParam(tanh, {2.0f / 256.0f}, {128}); // pre-defined qparam for U8
+    addQuantParam(quantize, {1.0}, {0});         // not pre-defined values
+
+    tanh->x(input);
+    quantize->input(tanh);
+    output->from(quantize);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleTanh *tanh = nullptr;
+  luci::CircleQuantize *quantize = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+/**
+ *  Test graph for forward propagation in Quantize Op
+ *
+ *  BEFORE
+ *
+ *         [Floor U8] (qparam 1 - int scale)
+ *             |
+ *       [Quantize S16] (qparam 2 - not int scale)
+ *
+ *  AFTER
+ *
+ *         [Floor U8] (qparam 1 - int scale)
+ *             |
+ *       [Quantize S16] (qparam 3 - int scale)
+ *
+ */
+class FloorQuantizeGraph
+{
+public:
+  FloorQuantizeGraph()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    floor = g.nodes()->create<luci::CircleFloor>();
+    quantize = g.nodes()->create<luci::CircleQuantize>();
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    floor->dtype(loco::DataType::U8);
+    quantize->dtype(loco::DataType::S16);
+
+    addQuantParam(floor, {4.0f}, {128}); // int scale
+    addQuantParam(quantize, {0.3}, {0}); // not int scale
+
+    floor->x(input);
+    quantize->input(floor);
+    output->from(quantize);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleFloor *floor = nullptr;
+  luci::CircleQuantize *quantize = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+} // namespace
+
+TEST(PropagateQParamForwardPassTest, name)
+{
+  luci::PropagateQParamForwardPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(PropagateQParamForward, simple)
+{
+  SimpleGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]);
+  EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]);
+  EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]);
+  EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]);
+  EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]);
+  EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, wrong_op_NEG)
+{
+  SimpleGraph g;
+  g.output->from(g.conv);
+  g.reshape->drop();
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
+  EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
+  EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
+  EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
+  EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
+  EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
+}
+
+TEST(PropagateQParamForward, tanh_predefined_value)
+{
+  TanhQuantizeGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(1.0f / 32768.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, floor_int_scale)
+{
+  FloorQuantizeGraph g;
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  EXPECT_FLOAT_EQ(1.0f, g.quantize->quantparam()->scale[0]);
+}
+
+TEST(PropagateQParamForward, same_dtype_NEG)
+{
+  FloorQuantizeGraph g;
+  g.quantize->dtype(loco::DataType::U8);
+
+  luci::PropagateQParamForwardPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  // Qparam is not propagated as ifm/ofm of Quantize Op have the same dtype
+  EXPECT_FLOAT_EQ(0.3f, g.quantize->quantparam()->scale[0]);
+}
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp

deleted file mode 100644 (file)

index b1cb7a4..0000000
--- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/PropagateQuantParamPass.h"
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Log.h>
-
-#include <iostream>
-
-namespace
-{
-
-bool copy_qparam(luci::CircleQuantParam *src, luci::CircleQuantParam *dst)
-{
-  assert(src->scale.size() == dst->scale.size());
-  assert(src->zerop.size() == dst->zerop.size());
-
-  // src and dst have the same qparam
-  if (std::equal(src->scale.begin(), src->scale.end(), dst->scale.begin()) &&
-      std::equal(src->zerop.begin(), src->zerop.end(), dst->zerop.begin()) &&
-      src->quantized_dimension == dst->quantized_dimension)
-    return false;
-
-  dst->scale.assign(src->scale.begin(), src->scale.end());
-  dst->zerop.assign(src->zerop.begin(), src->zerop.end());
-  dst->quantized_dimension = src->quantized_dimension;
-  return true;
-}
-
-bool copy_qparam(luci::CircleNode *src, luci::CircleNode *dst)
-{
-  // Skip nodes that do not have quantparams
-  auto src_qparam = src->quantparam();
-  if (not src_qparam)
-    return false;
-
-  auto dst_qparam = dst->quantparam();
-  if (not dst_qparam)
-    return false;
-
-  return copy_qparam(src_qparam, dst_qparam);
-}
-
-//  Visitor to propagate quantization parameters
-struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool>
-{
-  PropagateQuantParam() = default;
-
-  bool visit(luci::CircleNode *) { return false; }
-
-  bool visit(luci::CircleReshape *node)
-  {
-    auto input = node->tensor();
-    if (loco::succs(input).size() != 1)
-      return false;
-
-    auto input_node = loco::must_cast<luci::CircleNode *>(input);
-    return copy_qparam(input_node, node);
-  }
-
-  bool visit(luci::CircleTranspose *node)
-  {
-    auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
-    return copy_qparam(input_node, node);
-  }
-
-  // TODO : Add more Ops (e.g., layout-changing Ops)
-};
-
-} // namespace
-
-namespace luci
-{
-
-bool PropagateQuantParamPass::run(loco::Graph *g)
-{
-  bool changed = false;
-  LOGGER(l);
-  for (auto node : loco::active_nodes(loco::output_nodes(g)))
-  {
-    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    INFO(l) << "PropagateQuantParamPass visit node: " << circle_node->name() << std::endl;
-
-    PropagateQuantParam pqp;
-    if (circle_node->accept(&pqp))
-      changed = true;
-  }
-
-  return changed;
-}
-
-} // namespace luci
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp

deleted file mode 100644 (file)

index 0f15642..0000000
--- a/compiler/luci/pass/src/PropagateQuantParamPass.test.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "luci/Pass/PropagateQuantParamPass.h"
-
-#include <luci/IR/CircleNodes.h>
-
-#include <gtest/gtest.h>
-
-namespace
-{
-
-void addQuantParam(luci::CircleNode *node, const std::vector<float> &scale,
-                   const std::vector<int64_t> &zp)
-{
-  assert(node->quantparam() == nullptr);
-
-  auto quantparam = std::make_unique<luci::CircleQuantParam>();
-  quantparam->scale = scale;
-  quantparam->zerop = zp;
-  node->quantparam(std::move(quantparam));
-}
-
-/**
- *  Simple graph for test
- *
- *  BEFORE
- *
- *        [Conv] (qparam 1)
- *           |
- *       [Reshape] (qparam 2)
- *
- *  AFTER
- *
- *        [Conv] (qparam 2)
- *           |
- *       [Reshape] (qparam 2)
- *
- */
-class SimpleGraph
-{
-public:
-  SimpleGraph()
-  {
-    input = g.nodes()->create<luci::CircleInput>();
-    conv = g.nodes()->create<luci::CircleConv2D>();
-    reshape = g.nodes()->create<luci::CircleReshape>();
-    output = g.nodes()->create<luci::CircleOutput>();
-
-    auto graph_input = g.inputs()->create();
-    input->index(graph_input->index());
-    auto graph_output = g.outputs()->create();
-    output->index(graph_output->index());
-
-    addQuantParam(conv, {0.1, 0.2, 0.3}, {0, 10, 20});
-    addQuantParam(reshape, {0.2, 0.4, 0.6}, {-10, 0, 10});
-
-    conv->input(input);
-    reshape->tensor(conv);
-    output->from(reshape);
-  }
-
-public:
-  loco::Graph g;
-  luci::CircleInput *input;
-  luci::CircleConv2D *conv;
-  luci::CircleReshape *reshape;
-  luci::CircleOutput *output;
-};
-
-} // namespace
-
-TEST(PropagateQuantParamPassTest, name)
-{
-  luci::PropagateQuantParamPass pass;
-  auto const name = pass.name();
-  ASSERT_NE(nullptr, name);
-}
-
-TEST(PropagateQuantParam, simple)
-{
-  SimpleGraph g;
-
-  luci::PropagateQuantParamPass pass;
-  while (pass.run(&g.g))
-    ;
-
-  EXPECT_FLOAT_EQ(0.1, g.reshape->quantparam()->scale[0]);
-  EXPECT_FLOAT_EQ(0.2, g.reshape->quantparam()->scale[1]);
-  EXPECT_FLOAT_EQ(0.3, g.reshape->quantparam()->scale[2]);
-  EXPECT_EQ(0, g.reshape->quantparam()->zerop[0]);
-  EXPECT_EQ(10, g.reshape->quantparam()->zerop[1]);
-  EXPECT_EQ(20, g.reshape->quantparam()->zerop[2]);
-}
-
-TEST(PropagateQuantParam, wrong_op_NEG)
-{
-  SimpleGraph g;
-  g.output->from(g.conv);
-  g.reshape->drop();
-
-  luci::PropagateQuantParamPass pass;
-  while (pass.run(&g.g))
-    ;
-
-  EXPECT_FLOAT_EQ(0.1, g.conv->quantparam()->scale[0]);
-  EXPECT_FLOAT_EQ(0.2, g.conv->quantparam()->scale[1]);
-  EXPECT_FLOAT_EQ(0.3, g.conv->quantparam()->scale[2]);
-  EXPECT_EQ(0, g.conv->quantparam()->zerop[0]);
-  EXPECT_EQ(10, g.conv->quantparam()->zerop[1]);
-  EXPECT_EQ(20, g.conv->quantparam()->zerop[2]);
-}
diff --git a/compiler/luci/pass/src/QuantizationUtils.cpp b/compiler/luci/pass/src/QuantizationUtils.cpp

index 2f6fed46ef4c942aca3d12f8e2cecd75ff24af42..ad86cedf4a828d2e8b8435884591409917d6b555 100644 (file)
--- a/compiler/luci/pass/src/QuantizationUtils.cpp
+++ b/compiler/luci/pass/src/QuantizationUtils.cpp
@@ -33,43 +33,6 @@ bool is_quantized(const CircleNode *node)
            node->dtype() == loco::DataType::S64);  // bias (int16 quant)
  }
  
-// Check if node is weights of conv2d, depthwise_conv2d, or fully_connected layer
-bool is_weights(CircleNode *node)
-{
-  auto circle_const = dynamic_cast<CircleConst *>(node);
-  if (circle_const == nullptr)
-    return false;
-
-  auto succs = loco::succs(node);
-
-  // Node is weights if it is the weights of all of its successors
-  for (auto out : succs)
-  {
-    bool is_weights = false;
-
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->filter() == circle_const)
-      is_weights = true;
-
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->filter() == circle_const)
-      is_weights = true;
-
-    auto t_conv = dynamic_cast<CircleTransposeConv *>(out);
-    if (t_conv != nullptr && t_conv->filter() == circle_const && circle_const->rank() == 4)
-      is_weights = true;
-
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->weights() == circle_const)
-      is_weights = true;
-
-    if (!is_weights)
-      return false;
-  }
-
-  return true;
-}
-
  uint8_t fp32_to_uint8_cast(float f)
  {
    assert(std::numeric_limits<uint8_t>::min() <= f);
@@ -77,7 +40,6 @@ uint8_t fp32_to_uint8_cast(float f)
    return static_cast<uint8_t>(f);
  }
  
-// Per-layer quantization of weights (const tensor) using given min/max values
  void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
                                               float &scaling_factor, int64_t &zp, float &nudged_min,
                                               float &nudged_max)
@@ -107,7 +69,6 @@ void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float
    }
  }
  
-// Per-layer quantization of weights (const tensor) using given min/max values
  void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
                                              float &scaling_factor, int64_t &zp, float &nudged_min,
                                              float &nudged_max)
@@ -315,4 +276,123 @@ uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices)
           indices[2] * dimension.dim(3).value() + indices[3];
  }
  
+ActivationQType activation_qtype(const CircleNode *node)
+{
+  auto fused_act_node = dynamic_cast<const CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+  if (fused_act_node && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+    return ActivationQType::PreDefinedValue;
+
+  switch (node->opcode())
+  {
+    case CircleOpcode::LOGISTIC:
+    case CircleOpcode::TANH:
+    case CircleOpcode::SOFTMAX:
+      return ActivationQType::PreDefinedValue;
+    case CircleOpcode::FLOOR:
+    case CircleOpcode::FLOOR_DIV:
+    case CircleOpcode::FLOOR_MOD:
+    case CircleOpcode::CEIL:
+      return ActivationQType::IntScale;
+    default:
+      break;
+  }
+
+  return ActivationQType::MinMax;
+}
+
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype)
+{
+  auto qparam = std::make_unique<CircleQuantParam>();
+
+  auto set_qparam = [&qparam](float scale, int64_t zp) {
+    qparam->scale.emplace_back(scale);
+    qparam->zerop.emplace_back(zp);
+  };
+
+  switch (opcode)
+  {
+    case CircleOpcode::LOGISTIC:
+      if (dtype == loco::DataType::U8)
+        set_qparam(1.0f / 256.0f, 0);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32768.0f, 0);
+      }
+      break;
+    case CircleOpcode::TANH:
+      if (dtype == loco::DataType::U8)
+        set_qparam(2.0f / 256.0f, 128);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32768.0f, 0);
+      }
+      break;
+    case CircleOpcode::SOFTMAX:
+      if (dtype == loco::DataType::U8)
+        set_qparam(1.0f / 255.0f, 0);
+      else
+      {
+        assert(dtype == loco::DataType::S16);
+        set_qparam(1.0f / 32767.0f, 0);
+      }
+      break;
+    default:
+      throw std::runtime_error("Unsupported opcode with pre-defined qparam");
+  }
+  return std::move(qparam);
+}
+
+// For nodes with integer output, we use integer scale
+void set_int_scale(luci::CircleNode *node)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto qparam = node->quantparam();
+  assert(qparam);                    // FIX_CALLER_UNLESS
+  assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+
+  auto fp_scale = qparam->scale[0];
+  qparam->scale[0] = fp_scale < 1 ? 1.0f : std::round(fp_scale);
+}
+
+void quant_const(luci::CircleConst *node, loco::DataType quant_type)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  float min = std::numeric_limits<float>::max();
+  float max = std::numeric_limits<float>::lowest();
+  for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    min = data < min ? data : min;
+    max = data > max ? data : max;
+  }
+
+  float scaling_factor{0.0};
+  int64_t zp{0};
+  float nudged_min{0.0};
+  float nudged_max{0.0};
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+                                              nudged_max);
+      break;
+    case loco::DataType::S16:
+      symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
+                                             nudged_max);
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+
+  auto quantparam = std::make_unique<luci::CircleQuantParam>();
+  quantparam->scale.push_back(scaling_factor);
+  quantparam->zerop.push_back(zp);
+  node->quantparam(std::move(quantparam));
+}
+
  } // namespace luci
diff --git a/compiler/luci/pass/src/QuantizationUtils.h b/compiler/luci/pass/src/QuantizationUtils.h

index 605f6a77e08a84d8122ae5d1ff9939b96a5678bf..cd8cec95a740c31e591e792eee8a7b3bb1ac89d0 100644 (file)
--- a/compiler/luci/pass/src/QuantizationUtils.h
+++ b/compiler/luci/pass/src/QuantizationUtils.h
@@ -23,33 +23,61 @@
  namespace luci
  {
  
+// Compute scale/zp using given min/max for symmetric quantization (int16)
  void compute_sym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
                            float &nudged_min, float &nudged_max);
  
+// Compute scale/zp using given min/max for asymmetric quantization (uint8)
  void compute_asym_scale_zp(float min, float max, float &scaling_factor, int64_t &zp,
                             float &nudged_min, float &nudged_max);
  
+// Asymmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
  void asymmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
                                               float &scaling_factor, int64_t &zp, float &nudged_min,
                                               float &nudged_max);
  
+// Symmetric per-layer quantization of weights (const tensor) using given min/max values
+// NOTE: in-place update of node data
  void symmetric_wquant_with_minmax_per_layer(CircleConst *node, float min, float max,
                                              float &scaling_factor, int64_t &zp, float &nudged_min,
                                              float &nudged_max);
  
+// Helper function to get channel dimension
+// TODO Embed this function into iterate_per_channel
  bool get_channel_dim_index(CircleConst *node, loco::TensorShape &dimension,
                             int32_t &channel_dim_index);
  
+// Calculate offset of the given indices in dimension
  uint32_t cal_offset(loco::TensorShape &dimension, uint32_t *indices);
  
-void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type);
+// Backward propagation of concatenation qparam
+void propagate_concat_quantparam(luci::CircleConcatenation *concat);
  
-void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type);
-
-bool is_weights(CircleNode *node);
+// Backward propagation of pad_v2 qparam
+void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2);
  
+// Return true if the node is quantized
  bool is_quantized(const CircleNode *node);
  
+enum ActivationQType
+{
+  MinMax,          // Quantize using recorded min/max
+  PreDefinedValue, // Quantize using pre-defined values
+  IntScale,        // Round scale to a positive integer
+};
+
+ActivationQType activation_qtype(const CircleNode *node);
+
+// Create qparam with pre-defined values for speical operators
+std::unique_ptr<CircleQuantParam> make_predefined_qparam(CircleOpcode opcode, loco::DataType dtype);
+
+// Update node's scale to a positive integer (for special Ops e.g., Floor, Ceil)
+void set_int_scale(luci::CircleNode *node);
+
+// Quantize const tensor using its min/max values
+void quant_const(luci::CircleConst *node, loco::DataType quant_type);
+
  } // namespace luci
  
  #endif // __LUCI_QUANTIZATION_UTILS_H__
diff --git a/compiler/luci/pass/src/QuantizeActivation.cpp b/compiler/luci/pass/src/QuantizeActivation.cpp

new file mode 100644 (file)

index 0000000..1493318
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.cpp
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeActivation.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace luci;
+
+namespace
+{
+
+bool has_min_max(const CircleNode *node)
+{
+  return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
+}
+
+} // namespace
+
+// QuantizeActivation
+namespace luci
+{
+
+void QuantizeActivation::visit(luci::CircleNode *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
+
+  // Check if this is already quantized
+  if (is_quantized(node))
+    return;
+
+  // Check if this is bool type (bool type is not quantized)
+  if (node->dtype() == loco::DataType::BOOL)
+    return;
+
+  // Check if this is const (const activation is handled by QuantizeConstInputActivation)
+  // NOTE QuantizePreChecker guarantees weights/bias are const.
+  // Update this code when we accept non-const weights/bias.
+  if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
+    return;
+
+  // Check if this is activation
+  // We assume min/max are recorded only for activations
+  if (has_min_max(node))
+  {
+    // Quantize using recorded min/max
+    auto quantparam = node->quantparam();
+    assert(quantparam);
+    assert(quantparam->min.size() == 1); // only support layer-wise quant
+    assert(quantparam->max.size() == 1); // only support layer-wise quant
+    auto min = quantparam->min[0];
+    auto max = quantparam->max[0];
+
+    float scaling_factor{0};
+    int64_t zp{0};
+    float nudged_min{0};
+    float nudged_max{0};
+
+    if (output_type == loco::DataType::U8)
+    {
+      compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      node->dtype(loco::DataType::U8);
+    }
+    else
+    {
+      compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
+      node->dtype(loco::DataType::S16);
+    }
+
+    node->quantparam()->scale.push_back(scaling_factor);
+    node->quantparam()->zerop.push_back(zp);
+  }
+  // Fix special attributes
+  if (node->opcode() == luci::CircleOpcode::CAST)
+  {
+    auto *cast = loco::must_cast<luci::CircleCast *>(node);
+    auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x());
+
+    // make sure that cast_input is already quantized
+    assert(cast_input->dtype() != loco::DataType::FLOAT32);
+    cast->in_data_type(cast_input->dtype());
+    cast->out_data_type(cast->dtype());
+  }
+}
+
+} // namespace luci
+
+// QuantizeSpecialActivation
+namespace luci
+{
+
+void QuantizeSpecialActivation::visit(luci::CircleNode *node)
+{
+  // Nodes fused with activation functions which need special quantization
+  auto fused_act_node = dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(node);
+  if (fused_act_node != nullptr && fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
+  {
+    auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+    node->quantparam(std::move(qparam));
+  }
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleLogistic *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+  auto qparam = make_predefined_qparam(luci::CircleOpcode::LOGISTIC, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleTanh *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+  auto qparam = make_predefined_qparam(luci::CircleOpcode::TANH, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleSoftmax *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::PreDefinedValue);
+  auto qparam = make_predefined_qparam(luci::CircleOpcode::SOFTMAX, output_type);
+  node->quantparam(std::move(qparam));
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloor *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorDiv *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleFloorMod *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+void QuantizeSpecialActivation::visit(luci::CircleCeil *node)
+{
+  assert(activation_qtype(node) == luci::ActivationQType::IntScale);
+  set_int_scale(node);
+}
+
+} // namespace luci
+
+// QuantizeConstInputActivation
+namespace luci
+{
+
+// Default behavior (NYI)
+void QuantizeConstInputActivation::visit(luci::CircleNode *node)
+{
+  for (uint32_t i = 0; i < node->arity(); i++)
+  {
+    auto input_node = node->arg(i);
+    auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+    if (const_node != nullptr)
+      throw std::runtime_error("Unsupported Op for const inputs");
+  }
+}
+
+// INPUT_NAME is the only activation of NODE
+#define QUANTIZE_SINGLE_CONST_INPUT(NODE, INPUT_NAME)           \
+  void QuantizeConstInputActivation::visit(NODE *node)          \
+  {                                                             \
+    auto input = node->INPUT_NAME();                            \
+    auto const_node = dynamic_cast<luci::CircleConst *>(input); \
+    if (const_node && !is_quantized(const_node))                \
+    {                                                           \
+      auto new_const = luci::clone(const_node);                 \
+      quant_const(new_const, _output_type);                     \
+      node->INPUT_NAME(new_const);                              \
+    }                                                           \
+  }
+
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define QUANTIZE_TWO_CONST_INPUTS(NODE, INPUT_NAME1, INPUT_NAME2) \
+  void QuantizeConstInputActivation::visit(NODE *node)            \
+  {                                                               \
+    auto input1 = node->INPUT_NAME1();                            \
+    auto const_node1 = dynamic_cast<luci::CircleConst *>(input1); \
+    if (const_node1 && !is_quantized(const_node1))                \
+    {                                                             \
+      auto new_const1 = luci::clone(const_node1);                 \
+      quant_const(new_const1, _output_type);                      \
+      node->INPUT_NAME1(new_const1);                              \
+    }                                                             \
+    auto input2 = node->INPUT_NAME2();                            \
+    auto const_node2 = dynamic_cast<luci::CircleConst *>(input2); \
+    if (const_node2 && !is_quantized(const_node2))                \
+    {                                                             \
+      auto new_const2 = luci::clone(const_node2);                 \
+      quant_const(new_const2, _output_type);                      \
+      node->INPUT_NAME2(new_const2);                              \
+    }                                                             \
+  }
+
+// Ops that receive a single activation as an input
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleArgMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleBatchToSpaceND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleDepthToSpace, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleElu, features)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleExp, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleFloor, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleGather, params)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLocalResponseNormalization, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleLogistic, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMean, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleMirrorPad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CirclePad, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceAny, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceProd, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMax, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReduceMin, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReshape, tensor)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeBilinear, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleResizeNearestNeighbor, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleReverseSequence, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleRsqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSoftmax, logits)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToBatchND, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSpaceToDepth, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplit, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSplitV, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSqrt, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleStridedSlice, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleSum, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTanh, x)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTile, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTopKV2, input)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleTranspose, a)
+QUANTIZE_SINGLE_CONST_INPUT(luci::CircleUnpack, value)
+
+// Ops that receive two activations as inputs
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleAdd, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleBatchMatMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleFloorDiv, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreater, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleGreaterEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLess, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleLessEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMaximum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMinimum, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleMul, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleNotEqual, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CirclePow, x, y)
+QUANTIZE_TWO_CONST_INPUTS(luci::CircleSub, x, y)
+
+// AddN has arbitrary number of inputs
+void QuantizeConstInputActivation::visit(luci::CircleAddN *node)
+{
+  auto arity = node->arity();
+  for (uint32_t i = 0; i < arity; i++)
+  {
+    auto input_node = node->inputs(i);
+    auto const_node = dynamic_cast<luci::CircleConst *>(input_node);
+    if (const_node && !is_quantized(const_node))
+    {
+      auto new_const = luci::clone(const_node);
+      quant_const(new_const, _output_type);
+      node->inputs(i, new_const);
+    }
+  }
+}
+
+#undef QUANTIZE_SINGLE_CONST_INPUT
+#undef QUANTIZE_TWO_CONST_INPUTS
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeActivation.h b/compiler/luci/pass/src/QuantizeActivation.h

new file mode 100644 (file)

index 0000000..fc32d1c
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeActivation.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZATION_ACTIVATION_H__
+#define __LUCI_QUANTIZATION_ACTIVATION_H__
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Quantize non-const activation using recorded min/max values
+ */
+struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeActivation(loco::DataType input, loco::DataType output)
+    : input_type(input), output_type(output)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+
+  // Quantize each node using recorded min/max
+  void visit(luci::CircleNode *node);
+};
+
+/**
+ * @brief Quantize non-const activaion using pre-defined scale/zp for special Ops
+ */
+struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeSpecialActivation(loco::DataType input, loco::DataType output)
+    : input_type(input), output_type(output)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+
+  void visit(luci::CircleNode *node);
+  void visit(luci::CircleLogistic *node);
+  void visit(luci::CircleTanh *node);
+  void visit(luci::CircleSoftmax *node);
+  void visit(luci::CircleFloor *node);
+  void visit(luci::CircleFloorDiv *node);
+  void visit(luci::CircleFloorMod *node);
+  void visit(luci::CircleCeil *node);
+};
+
+// Quantize constant input activation of a node
+// The input of a node is quantized if it is
+// 1. Constant (instance of CircleConst*)
+// 2. Activation (other inputs e.g., weights, bias, axis, etc should not be quantized here)
+struct QuantizeConstInputActivation final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeConstInputActivation(loco::DataType output_type) : _output_type(output_type) {}
+
+private:
+  loco::DataType _output_type;
+
+// Skip NODE
+#define SKIP(NODE) \
+  void visit(NODE *) {}
+
+  // Handled in QuantizeWeights and QuantizeBias
+  SKIP(luci::CircleConv2D)
+  SKIP(luci::CircleDepthwiseConv2D)
+  SKIP(luci::CircleFullyConnected)
+  SKIP(luci::CircleInstanceNorm)
+  SKIP(luci::CirclePRelu)
+  SKIP(luci::CircleTransposeConv)
+
+  // Handled in PropagateQParamBackwardPass
+  SKIP(luci::CircleConcatenation)
+  SKIP(luci::CirclePadV2)
+  SKIP(luci::CirclePack)
+  SKIP(luci::CircleOneHot)
+
+  // Inputs of logical Ops are bool, thus not quantized
+  SKIP(luci::CircleLogicalOr)
+  SKIP(luci::CircleLogicalAnd)
+  SKIP(luci::CircleLogicalNot)
+
+#undef SKIP
+
+  // Default behavior (NYI)
+  void visit(luci::CircleNode *node);
+
+  // Ops that receive a single activation as an input
+  void visit(luci::CircleArgMax *node);
+  void visit(luci::CircleArgMin *node);
+  void visit(luci::CircleBatchToSpaceND *node);
+  void visit(luci::CircleDepthToSpace *node);
+  void visit(luci::CircleElu *node);
+  void visit(luci::CircleExp *node);
+  void visit(luci::CircleFloor *node);
+  void visit(luci::CircleGather *node);
+  void visit(luci::CircleLocalResponseNormalization *node);
+  void visit(luci::CircleLogistic *node);
+  void visit(luci::CircleMean *node);
+  void visit(luci::CircleMirrorPad *node);
+  void visit(luci::CirclePad *node);
+  void visit(luci::CircleReduceAny *node);
+  void visit(luci::CircleReduceProd *node);
+  void visit(luci::CircleReduceMax *node);
+  void visit(luci::CircleReduceMin *node);
+  void visit(luci::CircleReshape *node);
+  void visit(luci::CircleResizeBilinear *node);
+  void visit(luci::CircleResizeNearestNeighbor *node);
+  void visit(luci::CircleReverseSequence *node);
+  void visit(luci::CircleRsqrt *node);
+  void visit(luci::CircleSlice *node);
+  void visit(luci::CircleSoftmax *node);
+  void visit(luci::CircleSpaceToBatchND *node);
+  void visit(luci::CircleSpaceToDepth *node);
+  void visit(luci::CircleSplit *node);
+  void visit(luci::CircleSplitV *node);
+  void visit(luci::CircleSqrt *node);
+  void visit(luci::CircleStridedSlice *node);
+  void visit(luci::CircleSum *node);
+  void visit(luci::CircleTanh *node);
+  void visit(luci::CircleTile *node);
+  void visit(luci::CircleTopKV2 *node);
+  void visit(luci::CircleTranspose *node);
+  void visit(luci::CircleUnpack *node);
+
+  // Ops that receive two activations as inputs
+  void visit(luci::CircleAdd *node);
+  void visit(luci::CircleBatchMatMul *node);
+  void visit(luci::CircleDiv *node);
+  void visit(luci::CircleEqual *node);
+  void visit(luci::CircleFloorDiv *node);
+  void visit(luci::CircleGreater *node);
+  void visit(luci::CircleGreaterEqual *node);
+  void visit(luci::CircleLess *node);
+  void visit(luci::CircleLessEqual *node);
+  void visit(luci::CircleMaximum *node);
+  void visit(luci::CircleMinimum *node);
+  void visit(luci::CircleMul *node);
+  void visit(luci::CircleNotEqual *node);
+  void visit(luci::CirclePow *node);
+  void visit(luci::CircleSub *node);
+
+  // AddN has arbitrary number of inputs
+  void visit(luci::CircleAddN *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZATION_ACTIVATION_H__
diff --git a/compiler/luci/pass/src/QuantizeBias.cpp b/compiler/luci/pass/src/QuantizeBias.cpp

new file mode 100644 (file)

index 0000000..aa49623
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeBias.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <algorithm>
+#include <cmath>
+
+using namespace luci;
+
+namespace
+{
+
+// struct to carry Input/Weights/Bias
+struct IWB
+{
+  CircleNode *input = nullptr;
+  CircleNode *weights = nullptr;
+  CircleConst *bias = nullptr;
+
+  IWB(loco::Node *i, loco::Node *w, loco::Node *b)
+  {
+    input = dynamic_cast<luci::CircleNode *>(i);
+    weights = dynamic_cast<luci::CircleNode *>(w);
+    bias = dynamic_cast<luci::CircleConst *>(b);
+  }
+
+  // Return true if bias can be quantized with valid input an weights
+  operator bool()
+  {
+    if (bias == nullptr || is_quantized(bias))
+      return false;
+    if (input == nullptr || weights == nullptr)
+      return false;
+    return true;
+  }
+};
+
+// Create a new const node from an existing node.
+// The new node has the following characteristics
+// type: T
+// shape: same with 'node' (given as an argument)
+// buffer size: 'size' (given as an argument)
+// Note that contents are not filled in this function.
+template <loco::DataType T>
+luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
+{
+  auto new_node = node->graph()->nodes()->create<CircleConst>();
+  // TODO: We don't have any naming convention for quantized nodes yet.
+  //       Fix this when we have one.
+  new_node->name(node->name());
+  new_node->dtype(T);
+  new_node->rank(node->rank());
+  for (uint32_t i = 0; i < node->rank(); i++)
+    new_node->dim(i).set(node->dim(i).value());
+
+  new_node->size<T>(size);
+  new_node->shape_status(luci::ShapeStatus::VALID);
+
+  return new_node;
+}
+
+CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
+                                       float *scaling_factor, int64_t *zp)
+{
+  float scale = input_scale * weight_scale;
+  const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    quantized_values[i] =
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+  *scaling_factor = scale;
+  *zp = 0;
+
+  return new_bias;
+}
+
+CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
+                                    std::vector<float> &weight_scale,
+                                    std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
+{
+  float scaling_factor_inv{0};
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    scaling_factor[i] = input_scale * weight_scale[i];
+    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+    quantized_values[i] =
+      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+    zp[i] = 0;
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
+
+  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
+  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S32>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+
+  return new_bias;
+}
+
+CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
+                                          std::vector<float> &weight_scale,
+                                          std::vector<float> &scaling_factor,
+                                          std::vector<int64_t> &zp)
+{
+  float scaling_factor_inv{0};
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int64_t> quantized_values(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    scaling_factor[i] = input_scale * weight_scale[i];
+    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
+    quantized_values[i] =
+      static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
+    zp[i] = 0;
+  }
+
+  auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
+  }
+
+  return new_bias;
+}
+
+} // namespace
+
+namespace luci
+{
+
+// Return a quantized bias node
+CircleConst *QuantizeBias::quantized_bias(CircleNode *input, const CircleNode *weight,
+                                          CircleNode *bias)
+{
+  auto const_bias = loco::must_cast<luci::CircleConst *>(bias);
+  assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+  // If input is const, it is quantized here, not in QuantizeActivation
+  if (auto const_input = dynamic_cast<luci::CircleConst *>(input))
+  {
+    quant_const(const_input, output_type);
+  }
+
+  CircleConst *new_bias = nullptr;
+
+  if (granularity == QuantizationGranularity::ChannelWise)
+  {
+    auto input_q = input->quantparam();
+    assert(input_q);
+    assert(input_q->scale.size() == 1); // input scale's layer-wise
+    auto input_scale = input_q->scale[0];
+
+    assert(weight->quantparam() != nullptr); // weight scale's channel-wise
+    auto weight_scale = weight->quantparam()->scale;
+
+    uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
+    assert(size == weight_scale.size());
+    std::vector<float> scaling_factor(size);
+    std::vector<int64_t> zp(size);
+
+    if (output_type == loco::DataType::U8)
+    {
+      new_bias = quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+    }
+    else if (output_type == loco::DataType::S16)
+    {
+      new_bias =
+        int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
+    }
+    else
+    {
+      throw std::runtime_error("Unsupported quantization type.");
+    }
+
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->scale = scaling_factor;
+    quantparam->zerop = zp;
+    assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+    new_bias->quantparam(std::move(quantparam));
+
+    return new_bias;
+  }
+  else
+  {
+    auto input_q = input->quantparam();
+    assert(input_q);
+    assert(input_q->scale.size() == 1); // Only support per-layer quant
+    auto input_scale = input_q->scale[0];
+
+    auto weight_q = weight->quantparam();
+    assert(weight_q);
+    assert(weight_q->scale.size() == 1); // Only support per-layer quant
+    auto weight_scale = weight_q->scale[0];
+
+    float scaling_factor{0};
+    int64_t zp{0};
+    new_bias =
+      asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->scale.push_back(scaling_factor);
+    quantparam->zerop.push_back(zp);
+    assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
+    new_bias->quantparam(std::move(quantparam));
+
+    return new_bias;
+  }
+}
+
+void QuantizeBias::visit(luci::CircleConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleTransposeConv *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias QuantizeBias::visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->outBackprop(), node->filter(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+void QuantizeBias::visit(luci::CircleFullyConnected *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeBias visit node: " << node->name() << std::endl;
+
+  if (auto iwb = IWB(node->input(), node->weights(), node->bias()))
+  {
+    auto new_bias = quantized_bias(iwb.input, iwb.weights, iwb.bias);
+    node->bias(new_bias);
+  }
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeBias.h b/compiler/luci/pass/src/QuantizeBias.h

new file mode 100644 (file)

index 0000000..8de09df
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeBias.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_BIAS_H__
+#define __LUCI_QUANTIZE_BIAS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeBias quantizes tensors for bias
+ * @details Use input/weights scale to quantize values
+ */
+struct QuantizeBias final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+    : input_type(input), output_type(output), granularity(gr)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+  QuantizationGranularity granularity;
+
+private:
+  // Return a quantized bias node
+  CircleConst *quantized_bias(CircleNode *input, const CircleNode *weight, CircleNode *bias);
+
+  void visit(luci::CircleConv2D *node);
+  void visit(luci::CircleDepthwiseConv2D *node);
+  void visit(luci::CircleTransposeConv *node);
+  void visit(luci::CircleFullyConnected *node);
+
+  // Default behavior
+  void visit(luci::CircleNode *) {}
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_BIAS_H__
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp

index c8ad87e3db4783f43de959920486065a63e227f9..c9b35e0be4e89afeca4988ef3632964dcc266199 100644 (file)
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -16,9 +16,11 @@
  
  #include "luci/Pass/QuantizeDequantizeWeightsPass.h"
  #include "QuantizationUtils.h"
+#include "helpers/LayerInfoMap.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Service/Nodes/CircleConst.h>
  #include <luci/Log.h>
  #include <loco/IR/TensorShape.h>
  
@@ -251,7 +253,7 @@ void asymmetric_wdequant_with_minmax_per_layer(CircleConst *node, float scaling_
   * @brief QuantizeDequantizeWeights quantizes and dequantizes tensors for weights
   * @details Find min/max values on the fly, quantize the model, and dequantize the model
   */
-struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
+struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<void>
  {
    QuantizeDequantizeWeights(loco::DataType input, loco::DataType output,
                              QuantizationGranularity granularity)
@@ -263,88 +265,164 @@ struct QuantizeDequantizeWeights final : public luci::CircleNodeMutableVisitor<b
    loco::DataType output_type;
    QuantizationGranularity granularity;
  
-  // Quantize and dequantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+private:
+  // Fake quantize weights (Only u8 quantization is supported for LWQ)
+  void fake_quantize_lwq(luci::CircleConst *weights) const
    {
-    assert(output_type == loco::DataType::U8 || output_type == loco::DataType::S16);
-    LOGGER(l);
-    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
-    auto arity = node->arity();
-    for (uint32_t i = 0; i < arity; i++)
+    assert(output_type == loco::DataType::U8); // FIX_CALLER_UNLESS
+
+    // Find min/max per layer
+    float min = std::numeric_limits<float>::max();
+    float max = std::numeric_limits<float>::lowest();
+    for (uint32_t i = 0; i < weights->size<loco::DataType::FLOAT32>(); i++)
      {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
+      auto data = weights->at<loco::DataType::FLOAT32>(i);
+      min = data < min ? data : min;
+      max = data > max ? data : max;
+    }
+    float scaling_factor{0};
+    int64_t zp{0};
+    float nudged_min{0};
+    float nudged_max{0};
+
+    asymmetric_wquant_with_minmax_per_layer(weights, min, max, scaling_factor, zp, nudged_min,
+                                            nudged_max);
+    asymmetric_wdequant_with_minmax_per_layer(weights, scaling_factor, nudged_min);
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->min.push_back(nudged_min);
+    quantparam->max.push_back(nudged_max);
+    quantparam->scale.push_back(scaling_factor);
+    quantparam->zerop.push_back(zp);
+    weights->quantparam(std::move(quantparam));
+  }
  
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
+private:
+  // Fake quantize weights (u8/s16 quantization are supported for CWQ)
+  void fake_quantize_cwq(luci::CircleConst *weights) const
+  {
+    assert(output_type == loco::DataType::U8 ||
+           output_type == loco::DataType::S16); // FIX_CALLER_UNLESS
  
-      if (is_weights(circle_node))
-      {
-        auto circle_const = loco::must_cast<luci::CircleConst *>(circle_node);
+    // Find min/max per channel
+    std::vector<float> min;
+    std::vector<float> max;
  
-        // Find min/max per channel-wise
-        if (granularity == QuantizationGranularity::ChannelWise)
-        {
-          std::vector<float> min;
-          std::vector<float> max;
-
-          cal_minmax_per_channel(circle_const, min, max);
-
-          std::vector<float> nudged_min(min.size());
-          std::vector<float> nudged_max(min.size());
-          std::vector<float> scaling_factor(min.size());
-          std::vector<int64_t> zp(min.size());
-
-          if (output_type == loco::DataType::U8)
-          {
-            asymmetric_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
-                                          nudged_max);
-            asymmetric_wdequant_per_channel(circle_const, scaling_factor, nudged_min);
-          }
-          else
-          {
-            sym_wquant_per_channel(circle_const, min, max, scaling_factor, zp, nudged_min,
-                                   nudged_max);
-            sym_wdequant_per_channel(circle_const, scaling_factor);
-          }
-
-          auto quantparam = std::make_unique<CircleQuantParam>();
-          quantparam->min = nudged_min;
-          quantparam->max = nudged_max;
-          quantparam->scale = scaling_factor;
-          quantparam->zerop = zp;
-          circle_node->quantparam(std::move(quantparam));
-        }
-        // Find min/max per layer-wise
-        else
-        {
-          float min = std::numeric_limits<float>::max();
-          float max = std::numeric_limits<float>::lowest();
-          for (uint32_t i = 0; i < circle_const->size<loco::DataType::FLOAT32>(); i++)
-          {
-            auto data = circle_const->at<loco::DataType::FLOAT32>(i);
-            min = data < min ? data : min;
-            max = data > max ? data : max;
-          }
-          float scaling_factor{0};
-          int64_t zp{0};
-          float nudged_min{0};
-          float nudged_max{0};
-
-          asymmetric_wquant_with_minmax_per_layer(circle_const, min, max, scaling_factor, zp,
-                                                  nudged_min, nudged_max);
-          asymmetric_wdequant_with_minmax_per_layer(circle_const, scaling_factor, nudged_min);
-          auto quantparam = std::make_unique<CircleQuantParam>();
-          quantparam->min.push_back(nudged_min);
-          quantparam->max.push_back(nudged_max);
-          quantparam->scale.push_back(scaling_factor);
-          quantparam->zerop.push_back(zp);
-          circle_node->quantparam(std::move(quantparam));
-        }
-      }
+    cal_minmax_per_channel(weights, min, max);
+
+    std::vector<float> nudged_min(min.size());
+    std::vector<float> nudged_max(min.size());
+    std::vector<float> scaling_factor(min.size());
+    std::vector<int64_t> zp(min.size());
+
+    if (output_type == loco::DataType::U8)
+    {
+      asymmetric_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max);
+      asymmetric_wdequant_per_channel(weights, scaling_factor, nudged_min);
+    }
+    else
+    {
+      sym_wquant_per_channel(weights, min, max, scaling_factor, zp, nudged_min, nudged_max);
+      sym_wdequant_per_channel(weights, scaling_factor);
      }
-    return false;
+
+    auto quantparam = std::make_unique<CircleQuantParam>();
+    quantparam->min = nudged_min;
+    quantparam->max = nudged_max;
+    quantparam->scale = scaling_factor;
+    quantparam->zerop = zp;
+    weights->quantparam(std::move(quantparam));
+  }
+
+private:
+  void fake_quantize(luci::CircleConst *weights) const
+  {
+    switch (granularity)
+    {
+      case luci::QuantizationGranularity::ChannelWise:
+        fake_quantize_cwq(weights);
+        break;
+      case luci::QuantizationGranularity::LayerWise:
+        fake_quantize_lwq(weights);
+        break;
+      default:
+        throw std::invalid_argument("Unsupported granularity");
+    }
+  }
+
+private:
+  // Check if
+  // 1. node is const
+  // 2. node was not quantized
+  bool is_quantizable(loco::Node *node)
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (not const_node)
+      return false;
+
+    // Skip if this is already quantized
+    if (is_quantized(const_node))
+      return false;
+
+    return true;
+  }
+
+  // Default behavior (Do nothing)
+  void visit(luci::CircleNode *) {}
+
+  void visit(luci::CircleConv2D *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleDepthwiseConv2D *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleTransposeConv *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->filter()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    fake_quantize(new_weights);
+  }
+
+  void visit(luci::CircleFullyConnected *node)
+  {
+    LOGGER(l);
+    INFO(l) << "QuantizeDequantizeWeights visit node: " << node->name() << std::endl;
+
+    if (not is_quantizable(node->weights()))
+      return;
+
+    auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+    auto new_weights = luci::clone(weights);
+    node->weights(new_weights);
+    fake_quantize(new_weights);
    }
  };
  
@@ -355,11 +433,36 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
    LOGGER(l);
    INFO(l) << "QuantizeDequantizeWeightsPass Start" << std::endl;
  
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
    // Quantize weights
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeDequantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                                 quantize_granularity(circle_node));
      circle_node->accept(&qw);
    }
  
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp

index f226253c29385743beacf6c83893ea8df0d29042..15f5ca7ac69eea5e356a27386d0a2cbccbe337ff 100644 (file)
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.test.cpp
@@ -25,3 +25,17 @@ TEST(QuantizeDequantizeWeightsPassTest, name)
    auto const name = pass.name();
    ASSERT_NE(nullptr, name);
  }
+
+TEST(QuantizeDequantizeWeightsPassTest, name_ctx)
+{
+  auto ctx = std::make_unique<luci::QuantizeDequantizeWeightsPass::Context>();
+  {
+    ctx->input_model_dtype = loco::DataType::FLOAT32;
+    ctx->output_model_dtype = loco::DataType::U8;
+    ctx->granularity = luci::QuantizationGranularity::LayerWise;
+  }
+
+  luci::QuantizeDequantizeWeightsPass pass(std::move(ctx));
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp

new file mode 100644 (file)

index 0000000..4b3b7e3
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void check_const_opcode(luci::CircleNode *node)
+{
+  if (node == nullptr)
+    return;
+
+  if (node->opcode() != luci::CircleOpcode::CIRCLECONST and
+      node->opcode() != luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE)
+  {
+    throw std::runtime_error("Unsupported non const input " + node->name());
+  }
+}
+
+struct ConstInputChecker final : public luci::CircleNodeMutableVisitor<void>
+{
+// INPUT_NAME is name for input const for current NODE
+#define CHECK_NODE_WITH_ONE_INPUT_CONST(NODE, INPUT_NAME)                    \
+  void visit(NODE *node)                                                     \
+  {                                                                          \
+    const auto input = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME()); \
+    check_const_opcode(input);                                               \
+  }
+
+// INPUT_NAME_1 and INPUT_NAME_2 are names for input const for current NODE
+#define CHECK_NODE_WITH_TWO_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2)        \
+  void visit(NODE *node)                                                         \
+  {                                                                              \
+    const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1()); \
+    const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2()); \
+                                                                                 \
+    check_const_opcode(input_1);                                                 \
+    check_const_opcode(input_2);                                                 \
+  }
+
+// INPUT_NAME_1, INPUT_NAME_2 and INPUT_NAME_3 are names for input const for current NODE
+#define CHECK_NODE_WITH_THREE_INPUT_CONST(NODE, INPUT_NAME_1, INPUT_NAME_2, INPUT_NAME_3) \
+  void visit(NODE *node)                                                                  \
+  {                                                                                       \
+    const auto input_1 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_1());          \
+    const auto input_2 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_2());          \
+    const auto input_3 = dynamic_cast<luci::CircleNode *>(node->INPUT_NAME_3());          \
+                                                                                          \
+    check_const_opcode(input_1);                                                          \
+    check_const_opcode(input_2);                                                          \
+    check_const_opcode(input_3);                                                          \
+  }
+
+  // Skip other circle node
+  void visit(luci::CircleNode *) {}
+
+  // Ops that receive one const nodes as inputs
+  CHECK_NODE_WITH_ONE_INPUT_CONST(luci::CirclePRelu, alpha)
+
+  // Ops that receive two const node as an inputs
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleConv2D, filter, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleDepthwiseConv2D, filter, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleFullyConnected, weights, bias)
+  CHECK_NODE_WITH_TWO_INPUT_CONST(luci::CircleInstanceNorm, gamma, beta)
+
+  // Ops that receive three const nodes as an inputs
+  CHECK_NODE_WITH_THREE_INPUT_CONST(luci::CircleTransposeConv, inputSizes, filter, bias)
+
+#undef CHECK_NODE_WITH_ONE_INPUT_CONST
+#undef CHECK_NODE_WITH_TWO_INPUT_CONST
+#undef CHECK_NODE_WITH_THREE_INPUT_CONST
+};
+
+} // namespace
+
+/**
+ * Verify the input model has the form acceptable by quantizer
+ */
+bool QuantizePreCheckerPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizePreCheckerPass Start" << std::endl;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    // Check const inputs
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    ConstInputChecker checker{};
+    circle_node->accept(&checker);
+  }
+
+  INFO(l) << "QuantizePreCheckerPass End" << std::endl;
+
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp

new file mode 100644 (file)

index 0000000..788353c
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizePreCheckerPass.test.cpp
@@ -0,0 +1,401 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/QuantizePreCheckerPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+class SimpleConv2DGraph
+{
+public:
+  SimpleConv2DGraph(bool make_valid)
+  {
+    conv2d_node = g.nodes()->create<luci::CircleConv2D>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    conv2d_node->input(input_1);
+    conv2d_node->filter(filter);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      conv2d_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      conv2d_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(conv2d_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleConv2D *conv2d_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleDepthConv2DGraph
+{
+public:
+  SimpleDepthConv2DGraph(bool make_valid)
+  {
+    depth_conv2d_node = g.nodes()->create<luci::CircleDepthwiseConv2D>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    depth_conv2d_node->input(input_1);
+    depth_conv2d_node->filter(filter);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      depth_conv2d_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      depth_conv2d_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(depth_conv2d_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleDepthwiseConv2D *depth_conv2d_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleFCGraph
+{
+public:
+  SimpleFCGraph(bool make_valid)
+  {
+    fc_node = g.nodes()->create<luci::CircleFullyConnected>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    weights = g.nodes()->create<luci::CircleConst>();
+
+    fc_node->input(input_1);
+    fc_node->weights(weights);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      fc_node->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      fc_node->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(fc_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleFullyConnected *fc_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *weights = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleInstanceNormGraph
+{
+public:
+  SimpleInstanceNormGraph(bool make_valid)
+  {
+    instance_norm_node = g.nodes()->create<luci::CircleInstanceNorm>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+    gamma = g.nodes()->create<luci::CircleConst>();
+
+    instance_norm_node->input(input_1);
+    instance_norm_node->gamma(gamma);
+
+    if (make_valid)
+    {
+      beta = g.nodes()->create<luci::CircleConst>();
+      instance_norm_node->beta(beta);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      instance_norm_node->beta(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(instance_norm_node);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleInstanceNorm *instance_norm_node = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *gamma = nullptr;
+  luci::CircleConst *beta = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimpleTransposeConvGraph
+{
+public:
+  SimpleTransposeConvGraph(bool make_valid)
+  {
+    transpose_conv = g.nodes()->create<luci::CircleTransposeConv>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+
+    input_sizes = g.nodes()->create<luci::CircleConst>();
+    filter = g.nodes()->create<luci::CircleConst>();
+
+    transpose_conv->outBackprop(input_1);
+    transpose_conv->filter(filter);
+    transpose_conv->inputSizes(input_sizes);
+
+    if (make_valid)
+    {
+      bias = g.nodes()->create<luci::CircleConst>();
+      transpose_conv->bias(bias);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      transpose_conv->bias(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(transpose_conv);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CircleTransposeConv *transpose_conv = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *input_sizes = nullptr;
+  luci::CircleConst *filter = nullptr;
+  luci::CircleConst *bias = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class SimplePReluGraph
+{
+public:
+  SimplePReluGraph(bool make_valid)
+  {
+    prelu = g.nodes()->create<luci::CirclePRelu>();
+    input_1 = g.nodes()->create<luci::CircleInput>();
+
+    prelu->input(input_1);
+
+    if (make_valid)
+    {
+      alpha = g.nodes()->create<luci::CircleConst>();
+      prelu->alpha(alpha);
+    }
+    else
+    {
+      input_2 = g.nodes()->create<luci::CircleInput>();
+      prelu->alpha(input_2);
+    }
+
+    output = g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    output->from(prelu);
+  }
+
+public:
+  loco::Graph g;
+
+private:
+  luci::CirclePRelu *prelu = nullptr;
+  luci::CircleInput *input_1 = nullptr;
+  luci::CircleInput *input_2 = nullptr;
+  luci::CircleConst *alpha = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+TEST(QuantizePreCheckerPassTest, name)
+{
+  luci::QuantizePreCheckerPass pass{};
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+// Test Conv2d
+TEST(QuantizePreCheckerPassTest, conv2d)
+{
+  SimpleConv2DGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, conv2d_NEG)
+{
+  SimpleConv2DGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test DepthwiseConv2d
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d)
+{
+  SimpleDepthConv2DGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, depthwise_conv2d_NEG)
+{
+  SimpleDepthConv2DGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test FullyConnected
+TEST(QuantizePreCheckerPassTest, fully_connected)
+{
+  SimpleFCGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, fully_connected_NEG)
+{
+  SimpleFCGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test InstanceNorm
+TEST(QuantizePreCheckerPassTest, instance_norm)
+{
+  SimpleInstanceNormGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, instance_norm_NEG)
+{
+  SimpleInstanceNormGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test TransposeConv
+TEST(QuantizePreCheckerPassTest, transpose_conv)
+{
+  SimpleTransposeConvGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, transpose_conv_NEG)
+{
+  SimpleTransposeConvGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
+
+// Test PRelu
+TEST(QuantizePreCheckerPassTest, prelu)
+{
+  SimplePReluGraph valid_graph(true);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_NO_THROW(checker.run(&valid_graph.g));
+}
+
+TEST(QuantizePreCheckerPassTest, prelu_NEG)
+{
+  SimplePReluGraph invalid_graph(false);
+
+  luci::QuantizePreCheckerPass checker{};
+
+  EXPECT_ANY_THROW(checker.run(&invalid_graph.g));
+}
diff --git a/compiler/luci/pass/src/QuantizeWeights.cpp b/compiler/luci/pass/src/QuantizeWeights.cpp

new file mode 100644 (file)

index 0000000..11322ab
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "QuantizeWeights.h"
+#include "QuantizationUtils.h"
+
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Log.h>
+
+#include <cmath>
+#include <vector>
+#include <functional>
+
+using namespace luci;
+
+namespace
+{
+
+using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
+
+void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
+{
+  loco::TensorShape dimension;
+  dimension.rank(4);
+  uint32_t indices[4] = {
+    0,
+  };
+
+  if (!get_channel_dim_index(node, dimension, channel_dim_index))
+  {
+    assert(false);
+    return;
+  }
+
+  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
+  {
+    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
+    {
+      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
+      {
+        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
+        {
+          func(indices, dimension, channel_dim_index);
+        }
+      }
+    }
+  }
+}
+
+void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
+                             std::vector<float> &scaling_factor, int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
+                            int32_t &channel_dim_index)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+
+  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
+  const int32_t kMinScale = -kMaxScale;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  std::vector<int32_t> quantized_values(size);
+
+  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
+    int channel_idx = indices[channel_dim_index];
+    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
+    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
+    quantized_values[cal_offset(dimension, indices)] =
+      static_cast<int32_t>(std::round(data * scaling_factor_inv));
+  };
+
+  iterate_per_channel(node, channel_dim_index, quantize);
+
+  node->dtype(loco::DataType::S16);      // change the type of tensor
+  node->size<loco::DataType::S16>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::S16>(i) =
+      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
+{
+  const int32_t kMinScale = 0;
+  const int32_t kMaxScale = 255;
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+
+  const float scaling_factor_inv = 1.0 / scaling_factor;
+  std::vector<int32_t> quantized_values(size);
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
+  }
+
+  node->dtype(loco::DataType::U8);      // change the type of tensor
+  node->size<loco::DataType::U8>(size); // resize tensor
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
+  }
+}
+
+// Quantize const per channel
+//
+// The last dimension of const is the same as the dimension of channel
+// And the rest of the const dimensions should be 1
+// So, a 'single value' is quantized per channel
+//
+// Quantization spec (f: fp value, q: quantized value)
+//
+// uint8
+//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+//   Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
+//
+// int16
+//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
+//   Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
+void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
+{
+  assert(node->dtype() == loco::DataType::FLOAT32);
+  assert(node->rank() > 0);
+
+  for (uint32_t i = 0; i < node->rank() - 1; i++)
+  {
+    // Caller should call this function when the below condition is satisfied
+    if (node->dim(i).value() != 1)
+      throw std::runtime_error("Non-channel dimension of const node must be 1");
+  }
+
+  uint32_t size = node->size<loco::DataType::FLOAT32>();
+  assert(size == node->dim(node->rank() - 1).value());
+
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->quantized_dimension = node->rank() - 1;
+  std::vector<int32_t> quantized_data(size);
+
+  for (uint32_t i = 0; i < size; ++i)
+  {
+    auto data = node->at<loco::DataType::FLOAT32>(i);
+    if (quant_type == loco::DataType::U8)
+    {
+      if (data >= 0)
+      {
+        quantparam->scale.push_back(data);
+        quantparam->zerop.push_back(0);
+        quantized_data[i] = 1;
+      }
+      else
+      {
+        quantparam->scale.push_back(-data);
+        quantparam->zerop.push_back(1);
+        quantized_data[i] = 0;
+      }
+    }
+    else if (quant_type == loco::DataType::S16)
+    {
+      if (data >= 0)
+      {
+        quantparam->scale.push_back(data);
+        quantized_data[i] = 1;
+      }
+      else
+      {
+        quantparam->scale.push_back(-data);
+        quantized_data[i] = -1;
+      }
+      quantparam->zerop.push_back(0);
+    }
+  }
+  node->quantparam(std::move(quantparam));
+
+  switch (quant_type)
+  {
+    case loco::DataType::U8:
+      node->dtype(loco::DataType::U8);
+      node->size<loco::DataType::U8>(size);
+      for (uint32_t i = 0; i < size; ++i)
+      {
+        assert(quantized_data[i] == 0 || quantized_data[i] == 1);
+        node->at<loco::DataType::U8>(i) = quantized_data[i];
+      }
+      break;
+    case loco::DataType::S16:
+      node->dtype(loco::DataType::S16);
+      node->size<loco::DataType::S16>(size);
+      for (uint32_t i = 0; i < size; ++i)
+      {
+        assert(quantized_data[i] == -1 || quantized_data[i] == 1);
+        node->at<loco::DataType::S16>(i) = quantized_data[i];
+      }
+      break;
+    default:
+      throw std::runtime_error("Unsupported data type");
+  }
+}
+
+} // namespace
+
+namespace luci
+{
+
+void QuantizeWeights::quantize_weights(luci::CircleConst *weights)
+{
+  // Find min/max per channel-wise
+  if (granularity == QuantizationGranularity::ChannelWise)
+  {
+    auto quantparam = weights->quantparam();
+    if (quantparam == nullptr)
+    {
+      assert(false && "quantparam is nullptr");
+      return;
+    }
+
+    auto min = quantparam->min;
+    auto scaling_factor = quantparam->scale;
+    int32_t channel_dim_index = 0;
+
+    if (output_type == loco::DataType::U8)
+    {
+      asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
+    }
+    else
+    {
+      sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
+    }
+    quantparam->min.clear();
+    quantparam->max.clear();
+    quantparam->quantized_dimension = channel_dim_index;
+  }
+  // Find min/max per layer-wise
+  else
+  {
+    // Quantize using recorded quantparam
+    auto quantparam = weights->quantparam();
+    assert(quantparam != nullptr);
+    assert(quantparam->min.size() == 1);   // only support layer-wise quant
+    assert(quantparam->scale.size() == 1); // only support layer-wise quant
+    auto min = quantparam->min[0];
+    auto scaling_factor = quantparam->scale[0];
+    asym_wquant_per_layer(weights, min, scaling_factor);
+    quantparam->min.clear();
+    quantparam->max.clear();
+  }
+}
+void QuantizeWeights::visit(luci::CircleConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleInstanceNorm *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
+  auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
+
+  if (!is_quantized(gamma))
+  {
+    assert(gamma->dtype() == loco::DataType::FLOAT32);
+    auto new_gamma = luci::clone(gamma);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_gamma, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_gamma, output_type);
+    node->gamma(new_gamma);
+  }
+  if (!is_quantized(beta))
+  {
+    assert(beta->dtype() == loco::DataType::FLOAT32);
+    auto new_beta = luci::clone(beta);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_beta, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_beta, output_type);
+    node->beta(new_beta);
+  }
+}
+
+void QuantizeWeights::visit(luci::CirclePRelu *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
+
+  if (!is_quantized(alpha))
+  {
+    assert(alpha->dtype() == loco::DataType::FLOAT32);
+    auto new_alpha = luci::clone(alpha);
+    if (granularity == QuantizationGranularity::LayerWise)
+      quant_const(new_alpha, output_type);
+    else if (granularity == QuantizationGranularity::ChannelWise)
+      quant_const_per_channel(new_alpha, output_type);
+    node->alpha(new_alpha);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleTransposeConv *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->filter(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleFullyConnected *node)
+{
+  LOGGER(l);
+  INFO(l) << "QuantizeWeights QuantizeWeights::visit node: " << node->name() << std::endl;
+
+  auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
+  if (!is_quantized(weights))
+  {
+    auto new_weights = luci::clone(weights);
+    node->weights(new_weights);
+    quantize_weights(new_weights);
+  }
+}
+
+void QuantizeWeights::visit(luci::CircleNode *) {}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/QuantizeWeights.h b/compiler/luci/pass/src/QuantizeWeights.h

new file mode 100644 (file)

index 0000000..f62cd40
--- /dev/null
+++ b/compiler/luci/pass/src/QuantizeWeights.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_QUANTIZE_WEIGHTS_H__
+#define __LUCI_QUANTIZE_WEIGHTS_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief QuantizeWeights quantizes tensors for weights
+ * @details Find min/max values on the fly and then quantize
+ */
+struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<void>
+{
+  QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
+    : input_type(input), output_type(output), granularity(gr)
+  {
+  }
+
+  loco::DataType input_type;
+  loco::DataType output_type;
+  QuantizationGranularity granularity;
+
+private:
+  void quantize_weights(luci::CircleConst *weights);
+
+  void visit(luci::CircleConv2D *node);
+  void visit(luci::CircleDepthwiseConv2D *node);
+  void visit(luci::CircleInstanceNorm *node);
+  void visit(luci::CirclePRelu *node);
+  void visit(luci::CircleTransposeConv *node);
+  void visit(luci::CircleFullyConnected *node);
+  void visit(luci::CircleNode *);
+};
+
+} // namespace luci
+
+#endif // __LUCI_QUANTIZE_WEIGHTS_H__
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

index c3552ec5267beefd02e1fb432250fe985feef53b..d9a9d4db74a069993660e06c0301527e4e1c7b99 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -15,55 +15,32 @@
   */
  
  #include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/PropagateQParamForwardPass.h"
+#include "luci/Pass/PropagateQParamBackwardPass.h"
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+#include "QuantizeActivation.h"
+#include "QuantizeWeights.h"
+#include "QuantizeBias.h"
  #include "QuantizationUtils.h"
+#include "ProgressReporter.h"
+#include "helpers/LayerInfoMap.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
  #include <luci/Service/Nodes/CircleConst.h>
  #include <luci/Profile/CircleNodeOrigin.h>
  #include <luci/Log.h>
+#include <logo/Phase.h>
  
  #include <oops/UserExn.h>
  
  #include <iostream>
  #include <cmath>
-#include <functional>
  
  namespace
  {
  
  using namespace luci;
-using IterFunc = std::function<void(uint32_t *, loco::TensorShape &, int32_t)>;
-
-void iterate_per_channel(CircleConst *node, int32_t &channel_dim_index, IterFunc func)
-{
-  loco::TensorShape dimension;
-  dimension.rank(4);
-  uint32_t indices[4] = {
-    0,
-  };
-
-  if (!get_channel_dim_index(node, dimension, channel_dim_index))
-  {
-    assert(false);
-    return;
-  }
-
-  for (indices[0] = 0; indices[0] < dimension.dim(0).value(); indices[0]++)
-  {
-    for (indices[1] = 0; indices[1] < dimension.dim(1).value(); indices[1]++)
-    {
-      for (indices[2] = 0; indices[2] < dimension.dim(2).value(); indices[2]++)
-      {
-        for (indices[3] = 0; indices[3] < dimension.dim(3).value(); indices[3]++)
-        {
-          func(indices, dimension, channel_dim_index);
-        }
-      }
-    }
-  }
-}
-
  // Create a Quantize Op whose
  // dtype is out_type
  // shape is the same with node
@@ -80,7 +57,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
    quantize->shape_status(luci::ShapeStatus::VALID);
  
    auto qparam = node->quantparam();
-  assert(qparam);                  // FIX_CALLER_UNLESS
+  assert(qparam); // FIX_CALLER_UNLESS
+
+  auto qtype = luci::activation_qtype(node);
+  if (qtype == ActivationQType::PreDefinedValue)
+  {
+    quantize->quantparam(luci::make_predefined_qparam(node->opcode(), out_type));
+    return quantize;
+  }
+
+  assert(qtype == ActivationQType::MinMax or qtype == ActivationQType::IntScale);
+
    assert(qparam->min.size() == 1); // FIX_CALLER_UNLESS
    assert(qparam->max.size() == 1); // FIX_CALLER_UNLESS
    auto min = qparam->min[0];
@@ -104,9 +91,17 @@ luci::CircleQuantize *create_quantize_op(luci::CircleNode *node, loco::DataType
    auto quantparam = std::make_unique<CircleQuantParam>();
    quantparam->scale.push_back(scaling_factor);
    quantparam->zerop.push_back(zp);
+  // Save original min/max (not nudged_min/max). Nudged min/max
+  // is different from the real min/max values, causing wrong
+  // qparam when quantization dtype is changed.
+  quantparam->min.push_back(min);
+  quantparam->max.push_back(max);
  
    quantize->quantparam(std::move(quantparam));
  
+  if (qtype == ActivationQType::IntScale)
+    set_int_scale(quantize);
+
    return quantize;
  }
  
@@ -118,1412 +113,232 @@ namespace luci
  namespace
  {
  
-// Create a new const node from an existing node.
-// The new node has the following characteristics
-// type: T
-// shape: same with 'node' (given as an argument)
-// buffer size: 'size' (given as an argument)
-// Note that contents are not filled in this function.
-template <loco::DataType T>
-luci::CircleConst *create_empty_const_from(luci::CircleConst *node, uint32_t size)
-{
-  auto new_node = node->graph()->nodes()->create<CircleConst>();
-  // TODO: We don't have any naming convention for quantized nodes yet.
-  //       Fix this when we have one.
-  new_node->name(node->name());
-  new_node->dtype(T);
-  new_node->rank(node->rank());
-  for (uint32_t i = 0; i < node->rank(); i++)
-    new_node->dim(i).set(node->dim(i).value());
-
-  new_node->size<T>(size);
-  new_node->shape_status(luci::ShapeStatus::VALID);
-
-  return new_node;
-}
-
-void overwrite_quantparam(luci::CircleNode *source, luci::CircleNode *target)
-{
-  auto source_qparam = source->quantparam();
-  if (source_qparam == nullptr)
-    throw std::runtime_error("source quantparam is not found during overwrite");
-
-  auto target_qparam = target->quantparam();
-  if (target_qparam == nullptr)
-  {
-    auto quantparam = std::make_unique<CircleQuantParam>();
-    target->quantparam(std::move(quantparam));
-    target_qparam = target->quantparam();
-
-    if (target_qparam == nullptr)
-      throw std::runtime_error("Creating new quant param failed");
-  }
-  target_qparam->min = source_qparam->min;
-  target_qparam->max = source_qparam->max;
-  target_qparam->scale = source_qparam->scale;
-  target_qparam->zerop = source_qparam->zerop;
-  target_qparam->quantized_dimension = source_qparam->quantized_dimension;
-}
-
-void quant_const_values(luci::CircleConst *const_node, float scaling_factor, float zerop,
-                        loco::DataType quant_type)
-{
-  uint32_t size = const_node->size<loco::DataType::FLOAT32>();
-
-  const float scaling_factor_inv = 1.0 / scaling_factor;
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    auto data = static_cast<double>(const_node->at<loco::DataType::FLOAT32>(i));
-    double quantized_float = std::round(data * scaling_factor_inv) + zerop;
-    constexpr auto int_max = static_cast<double>(std::numeric_limits<int32_t>::max());
-    constexpr auto int_min = static_cast<double>(std::numeric_limits<int32_t>::min());
-    quantized_float = std::min(int_max, std::max(int_min, quantized_float));
-
-    quantized_values[i] = static_cast<int32_t>(quantized_float);
-  }
-
-  switch (quant_type)
-  {
-    case loco::DataType::U8:
-      const_node->dtype(loco::DataType::U8);      // change the type of tensor
-      const_node->size<loco::DataType::U8>(size); // resize tensor
-      for (uint32_t i = 0; i < size; ++i)
-        const_node->at<loco::DataType::U8>(i) = std::min(255, std::max(0, quantized_values[i]));
-      break;
-    case loco::DataType::S16:
-      assert(zerop == 0);
-      const_node->dtype(loco::DataType::S16);      // change the type of tensor
-      const_node->size<loco::DataType::S16>(size); // resize tensor
-      for (uint32_t i = 0; i < size; ++i)
-        const_node->at<loco::DataType::S16>(i) =
-          std::min(32767, std::max(-32767, quantized_values[i]));
-      break;
-    default:
-      throw std::runtime_error("Unsupported data type");
-  }
-}
-
-// Quantize const per channel
-//
-// The last dimension of const is the same as the dimension of channel
-// And the rest of the const dimensions should be 1
-// So, a 'single value' is quantized per channel
-//
-// Quantization spec (f: fp value, q: quantized value)
-//
-// uint8
-//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
-//   Negative f: f = (-f) * (q - 1) [q = 0, scale = -f, zp = 1]
-//
-// int16
-//   Positive f: f = f * (q - 0) [q = 1, scale = f, zp = 0]
-//   Negative f: f = (-f) * (q - 0) [q = -1, scale = -f, zp = 0]
-void quant_const_per_channel(CircleConst *node, loco::DataType quant_type)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
-  assert(node->rank() > 0);
-
-  for (uint32_t i = 0; i < node->rank() - 1; i++)
-  {
-    // Caller should call this function when the below condition is satisfied
-    if (node->dim(i).value() != 1)
-      throw std::runtime_error("Non-channel dimension of const node must be 1");
-  }
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  assert(size == node->dim(node->rank() - 1).value());
-
-  auto quantparam = std::make_unique<CircleQuantParam>();
-  quantparam->quantized_dimension = node->rank() - 1;
-  std::vector<int32_t> quantized_data(size);
-
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    auto data = node->at<loco::DataType::FLOAT32>(i);
-    if (quant_type == loco::DataType::U8)
-    {
-      if (data >= 0)
-      {
-        quantparam->scale.push_back(data);
-        quantparam->zerop.push_back(0);
-        quantized_data[i] = 1;
-      }
-      else
-      {
-        quantparam->scale.push_back(-data);
-        quantparam->zerop.push_back(1);
-        quantized_data[i] = 0;
-      }
-    }
-    else if (quant_type == loco::DataType::S16)
-    {
-      if (data >= 0)
-      {
-        quantparam->scale.push_back(data);
-        quantized_data[i] = 1;
-      }
-      else
-      {
-        quantparam->scale.push_back(-data);
-        quantized_data[i] = -1;
-      }
-      quantparam->zerop.push_back(0);
-    }
-  }
-  node->quantparam(std::move(quantparam));
-
-  switch (quant_type)
-  {
-    case loco::DataType::U8:
-      node->dtype(loco::DataType::U8);
-      node->size<loco::DataType::U8>(size);
-      for (uint32_t i = 0; i < size; ++i)
-      {
-        assert(quantized_data[i] == 0 || quantized_data[i] == 1);
-        node->at<loco::DataType::U8>(i) = quantized_data[i];
-      }
-      break;
-    case loco::DataType::S16:
-      node->dtype(loco::DataType::S16);
-      node->size<loco::DataType::S16>(size);
-      for (uint32_t i = 0; i < size; ++i)
-      {
-        assert(quantized_data[i] == -1 || quantized_data[i] == 1);
-        node->at<loco::DataType::S16>(i) = quantized_data[i];
-      }
-      break;
-    default:
-      throw std::runtime_error("Unsupported data type");
-  }
-}
-
-void quant_const(CircleConst *node, loco::DataType quant_type)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
-
-  float min = std::numeric_limits<float>::max();
-  float max = std::numeric_limits<float>::lowest();
-  for (uint32_t i = 0; i < node->size<loco::DataType::FLOAT32>(); i++)
-  {
-    auto data = node->at<loco::DataType::FLOAT32>(i);
-    min = data < min ? data : min;
-    max = data > max ? data : max;
-  }
-
-  float scaling_factor{0.0};
-  int64_t zp{0};
-  float nudged_min{0.0};
-  float nudged_max{0.0};
-
-  switch (quant_type)
-  {
-    case loco::DataType::U8:
-      asymmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
-                                              nudged_max);
-      break;
-    case loco::DataType::S16:
-      symmetric_wquant_with_minmax_per_layer(node, min, max, scaling_factor, zp, nudged_min,
-                                             nudged_max);
-      break;
-    default:
-      throw std::runtime_error("Unsupported data type");
-  }
-
-  auto quantparam = std::make_unique<CircleQuantParam>();
-  quantparam->scale.push_back(scaling_factor);
-  quantparam->zerop.push_back(zp);
-  node->quantparam(std::move(quantparam));
-}
-
-// Check if the node is the bias of Conv2D, DepthwiseConv2D, FullyConnected, or TransposeConv layer
-// Returns a list of <input, weights, output> vectors for the above operators.
-// Note that it returns a 'list' because bias can be used by multiple operators.
-std::vector<std::vector<loco::Node *>> get_input_weight_output_of_bias(CircleNode *node)
-{
-  std::vector<std::vector<loco::Node *>> result;
-  auto circle_const = dynamic_cast<CircleConst *>(node);
-  if (circle_const == nullptr)
-    return result;
-
-  auto succs = loco::succs(node);
-
-  for (auto out : succs)
-  {
-    auto conv = dynamic_cast<CircleConv2D *>(out);
-    if (conv != nullptr && conv->bias() == circle_const)
-    {
-      assert(conv->input() != nullptr);
-      assert(conv->filter() != nullptr);
-      result.push_back({conv->input(), conv->filter(), conv});
-      continue;
-    }
-    auto dw_conv = dynamic_cast<CircleDepthwiseConv2D *>(out);
-    if (dw_conv != nullptr && dw_conv->bias() == circle_const)
-    {
-      assert(dw_conv->input() != nullptr);
-      assert(dw_conv->filter() != nullptr);
-      result.push_back({dw_conv->input(), dw_conv->filter(), dw_conv});
-      continue;
-    }
-    auto fc = dynamic_cast<CircleFullyConnected *>(out);
-    if (fc != nullptr && fc->bias() == circle_const)
-    {
-      assert(fc->input() != nullptr);
-      assert(fc->weights() != nullptr);
-      result.push_back({fc->input(), fc->weights(), fc});
-      continue;
-    }
-    auto tconv = dynamic_cast<CircleTransposeConv *>(out);
-    if (tconv != nullptr && tconv->bias() == circle_const)
-    {
-      assert(tconv->outBackprop() != nullptr);
-      assert(tconv->filter() != nullptr);
-      result.push_back({tconv->outBackprop(), tconv->filter(), tconv});
-      continue;
-    }
-  }
-  return result;
-}
-
-CircleConst *asym_quant_bias_per_layer(CircleConst *node, float input_scale, float weight_scale,
-                                       float *scaling_factor, int64_t *zp)
-{
-  float scale = input_scale * weight_scale;
-  const float scaling_factor_inv = (scale == 0) ? 0 : 1.0 / scale;
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    quantized_values[i] =
-      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
-  }
-
-  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
-
-  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
-  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    new_bias->at<loco::DataType::S32>(i) =
-      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-  *scaling_factor = scale;
-  *zp = 0;
-
-  return new_bias;
-}
-
-CircleConst *quant_bias_per_channel(CircleConst *node, float input_scale,
-                                    std::vector<float> &weight_scale,
-                                    std::vector<float> &scaling_factor, std::vector<int64_t> &zp)
-{
-  float scaling_factor_inv{0};
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
-
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    scaling_factor[i] = input_scale * weight_scale[i];
-    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
-    quantized_values[i] =
-      static_cast<int32_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
-    zp[i] = 0;
-  }
-
-  auto new_bias = create_empty_const_from<loco::DataType::S32>(node, size);
-
-  const int32_t kMinScale = std::numeric_limits<int32_t>::lowest();
-  const int32_t kMaxScale = std::numeric_limits<int32_t>::max();
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    new_bias->at<loco::DataType::S32>(i) =
-      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-
-  return new_bias;
-}
-
-CircleConst *int16_quant_bias_per_channel(CircleConst *node, float input_scale,
-                                          std::vector<float> &weight_scale,
-                                          std::vector<float> &scaling_factor,
-                                          std::vector<int64_t> &zp)
-{
-  float scaling_factor_inv{0};
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int64_t> quantized_values(size);
-
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    scaling_factor[i] = input_scale * weight_scale[i];
-    scaling_factor_inv = (scaling_factor[i] == 0) ? 0 : 1.0 / scaling_factor[i];
-    quantized_values[i] =
-      static_cast<int64_t>(std::round(node->at<loco::DataType::FLOAT32>(i) * scaling_factor_inv));
-    zp[i] = 0;
-  }
-
-  auto new_bias = create_empty_const_from<loco::DataType::S64>(node, size);
-
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    new_bias->at<loco::DataType::S64>(i) = quantized_values[i];
-  }
-
-  return new_bias;
-}
-
-bool has_min_max(const CircleNode *node)
-{
-  return node->quantparam() && !node->quantparam()->min.empty() && !node->quantparam()->max.empty();
-}
-
-void sym_wquant_per_channel(CircleConst *node, std::vector<float> &scaling_factor,
-                            int32_t &channel_dim_index)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
-
-  const int32_t kMaxScale = std::numeric_limits<int16_t>::max();
-  const int32_t kMinScale = -kMaxScale;
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
-
-  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
-    int channel_idx = indices[channel_dim_index];
-    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-    quantized_values[cal_offset(dimension, indices)] =
-      static_cast<int32_t>(std::round(data * scaling_factor_inv));
-  };
-
-  iterate_per_channel(node, channel_dim_index, quantize);
-
-  node->dtype(loco::DataType::S16);      // change the type of tensor
-  node->size<loco::DataType::S16>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    node->at<loco::DataType::S16>(i) =
-      std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-}
-
-void asym_wquant_per_channel(CircleConst *node, std::vector<float> &min,
-                             std::vector<float> &scaling_factor, int32_t &channel_dim_index)
-{
-  assert(node->dtype() == loco::DataType::FLOAT32);
-
-  const int32_t kMinScale = 0;
-  const int32_t kMaxScale = 255;
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-  std::vector<int32_t> quantized_values(size);
-
-  auto quantize = [&](uint32_t *indices, loco::TensorShape &dimension, int32_t channel_dim_index) {
-    int channel_idx = indices[channel_dim_index];
-    const float scaling_factor_inv = 1.0 / scaling_factor[channel_idx];
-    auto data = node->at<loco::DataType::FLOAT32>(cal_offset(dimension, indices));
-    quantized_values[cal_offset(dimension, indices)] =
-      static_cast<int32_t>(std::round((data - min[channel_idx]) * scaling_factor_inv));
-  };
-
-  iterate_per_channel(node, channel_dim_index, quantize);
-
-  node->dtype(loco::DataType::U8);      // change the type of tensor
-  node->size<loco::DataType::U8>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-}
-
-void asym_wquant_per_layer(CircleConst *node, float min, float scaling_factor)
-{
-  const int32_t kMinScale = 0;
-  const int32_t kMaxScale = 255;
-
-  uint32_t size = node->size<loco::DataType::FLOAT32>();
-
-  const float scaling_factor_inv = 1.0 / scaling_factor;
-  std::vector<int32_t> quantized_values(size);
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    auto data = node->at<loco::DataType::FLOAT32>(i);
-    quantized_values[i] = static_cast<int32_t>(std::round((data - min) * scaling_factor_inv));
-  }
-
-  node->dtype(loco::DataType::U8);      // change the type of tensor
-  node->size<loco::DataType::U8>(size); // resize tensor
-  for (uint32_t i = 0; i < size; ++i)
-  {
-    node->at<loco::DataType::U8>(i) = std::min(kMaxScale, std::max(kMinScale, quantized_values[i]));
-  }
-}
-
-void set_bias(luci::CircleNode *node, luci::CircleConst *bias)
-{
-  if (auto conv = dynamic_cast<CircleConv2D *>(node))
-    conv->bias(bias);
-  else if (auto dconv = dynamic_cast<CircleDepthwiseConv2D *>(node))
-    dconv->bias(bias);
-  else if (auto tconv = dynamic_cast<CircleTransposeConv *>(node))
-    tconv->bias(bias);
-  else if (auto fc = dynamic_cast<CircleFullyConnected *>(node))
-    fc->bias(bias);
-  else
-    throw std::runtime_error("Only convolution, depthwise convolution, transposed convolution, and "
-                             "fully-connected layer have bias");
-}
-
-void set_act_qparam(luci::CircleNode *node, float scale, int64_t zp)
-{
-  assert(node);               // FIX_CALLER_UNLESS
-  assert(node->quantparam()); // FIX_CALLER_UNLESS
-
-  auto qparam = node->quantparam();
-  assert(qparam->scale.size() == 1); // FIX_CALLER_UNLESS
-  assert(qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
-  qparam->scale[0] = scale;
-  qparam->zerop[0] = zp;
-}
-
-/**
- * @brief Manually set scale/zp of output tensor of special Ops
- */
-struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<void>
-{
-  QuantizeSpecialActivation(loco::DataType input, loco::DataType output)
-    : input_type(input), output_type(output)
-  {
-  }
-
-  loco::DataType input_type;
-  loco::DataType output_type;
-
-  void visit(luci::CircleNode *)
-  {
-    // Do nothing by default
-  }
-
-  void visit(luci::CircleLogistic *node)
-  {
-    if (output_type == loco::DataType::U8)
-      set_act_qparam(node, 1.0f / 256.0f, 0);
-    else
-    {
-      assert(output_type == loco::DataType::S16);
-      set_act_qparam(node, 1.0f / 32768.0f, 0);
-    }
-  }
-
-  void visit(luci::CircleTanh *node)
-  {
-    if (output_type == loco::DataType::U8)
-      set_act_qparam(node, 2.0f / 256.0f, 128);
-    else
-    {
-      assert(output_type == loco::DataType::S16);
-      set_act_qparam(node, 1.0f / 32768.0f, 0);
-    }
-  }
-
-  void visit(luci::CircleStridedSlice *node)
-  {
-    auto input = loco::must_cast<luci::CircleNode *>(node->input());
-    auto i_qparam = input->quantparam();
-    assert(i_qparam);
-    assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
-    assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
-    auto i_scale = i_qparam->scale[0];
-    auto i_zp = i_qparam->zerop[0];
-
-    set_act_qparam(node, i_scale, i_zp);
-  }
-
-  void visit(luci::CircleSplitOut *node)
-  {
-    auto split = loco::must_cast<luci::CircleSplit *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(split->input());
-    auto i_qparam = input->quantparam();
-    assert(i_qparam);
-    assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
-    assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
-    auto i_scale = i_qparam->scale[0];
-    auto i_zp = i_qparam->zerop[0];
-
-    set_act_qparam(node, i_scale, i_zp);
-  }
-
-  void visit(luci::CircleSplitVOut *node)
-  {
-    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
-    auto i_qparam = input->quantparam();
-    assert(i_qparam);
-    assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
-    assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
-    auto i_scale = i_qparam->scale[0];
-    auto i_zp = i_qparam->zerop[0];
-
-    set_act_qparam(node, i_scale, i_zp);
-  }
-
-  void visit(luci::CircleUnpackOut *node)
-  {
-    auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(unpack->value());
-    auto i_qparam = input->quantparam();
-    assert(i_qparam);
-    assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
-    assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
-    auto i_scale = i_qparam->scale[0];
-    auto i_zp = i_qparam->zerop[0];
-
-    set_act_qparam(node, i_scale, i_zp);
-  }
-
-  // TODO Move Softmax, Floor, Ceil from QuantizeActivation to here
-};
-
  /**
- * @brief QuantizeActivation quantizes tensors for activations
- * @details Quantize using recorded min/max values
+ * Insert Quantize operator for mixed-precision quantization
+ * 1. Before input feature map (only for non-const)
+ * 2. After output feature map
+ *
+ * For example, if default_dtype = U8 and op_dtype = S16,
+ * 1. Quantize Op for U8->S16 is inserted before ifm
+ * 2. Quantize Op for S16->U8 is inserted after ofm
+ *
+ * Why not insert Quantize Op for const ifm?
+ * We quantize const tensor at once to preserve precision.
+ * For example, if default dtype = U8, op_dtype = S16, and op is CONV2D,
+ * We directly quantize weights to 16 bits, not 8->16 bits.
   */
-struct QuantizeActivation final : public luci::CircleNodeMutableVisitor<bool>
+struct InsertQuantizeOp final : public luci::CircleNodeMutableVisitor<void>
  {
-  QuantizeActivation(loco::DataType input, loco::DataType output)
-    : input_type(input), output_type(output)
+  InsertQuantizeOp(loco::DataType default_dtype, loco::DataType op_dtype)
+    : _default_dtype(default_dtype), _op_dtype(op_dtype)
    {
+    assert(default_dtype != op_dtype); // FIX_CALLER_UNLESS
    }
  
-  loco::DataType input_type;
-  loco::DataType output_type;
+private:
+  loco::DataType _default_dtype;
+  loco::DataType _op_dtype;
  
-  // Quantize input tensors of each node
-  bool visit(luci::CircleNode *node)
+private:
+  luci::CircleQuantize *create_in_quantize(loco::Node *in, loco::Node *origin)
+  {
+    auto input = loco::must_cast<luci::CircleNode *>(in);
+    if (input->opcode() == luci::CircleOpcode::CIRCLECONST)
+      return nullptr;
+
+    auto input_quant = create_quantize_op(input, _op_dtype);
+    input_quant->input(input);
+    auto origin_node = loco::must_cast<luci::CircleNode *>(origin);
+    luci::add_origin(input_quant, luci::get_origin(origin_node));
+    return input_quant;
+  }
+
+  void insert_out_quantize(loco::Node *node)
+  {
+    auto output = loco::must_cast<luci::CircleNode *>(node);
+    assert(output->opcode() != luci::CircleOpcode::CIRCLECONST); // FIX_CALLER_UNLESS
+    auto output_quant = create_quantize_op(output, _default_dtype);
+
+    luci::add_origin(output_quant, luci::get_origin(output));
+    loco::replace(node).with(output_quant);
+    output_quant->input(node);
+  }
+
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_OP(NODE, INPUT_NAME)                    \
+  void visit(NODE *node)                                                 \
+  {                                                                      \
+    if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node)) \
+      node->INPUT_NAME(input_quant);                                     \
+                                                                         \
+    insert_out_quantize(node);                                           \
+  }
+
+// INPUT_NAME is the only activation of NODE
+#define INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(NODE, INPUT_NAME, OUT_NAME) \
+  void visit(NODE *node)                                                     \
+  {                                                                          \
+    if (auto input_quant = create_in_quantize(node->INPUT_NAME(), node))     \
+      node->INPUT_NAME(input_quant);                                         \
+                                                                             \
+    auto out_nodes = loco::succs(node);                                      \
+    for (auto out_node : out_nodes)                                          \
+    {                                                                        \
+      auto out_circle = loco::must_cast<OUT_NAME *>(out_node);               \
+      insert_out_quantize(out_circle);                                       \
+    }                                                                        \
+  }
+
+// INPUT_NAME1 and INPUT_NAME2 are the only activations of NODE
+#define INSERT_QUANTIZE_TO_BINARY_OP(NODE, INPUT_NAME1, INPUT_NAME2)       \
+  void visit(NODE *node)                                                   \
+  {                                                                        \
+    if (auto input1_quant = create_in_quantize(node->INPUT_NAME1(), node)) \
+      node->INPUT_NAME1(input1_quant);                                     \
+                                                                           \
+    if (auto input2_quant = create_in_quantize(node->INPUT_NAME2(), node)) \
+      node->INPUT_NAME2(input2_quant);                                     \
+                                                                           \
+    insert_out_quantize(node);                                             \
+  }
+
+  // Default behavior (NYI)
+  void visit(luci::CircleNode *node)
+  {
+    throw std::runtime_error("Unsupported Op for mixed-precision quantization. Layer name: " +
+                             node->name());
+  }
+
+  // Skip output layer
+  void visit(luci::CircleOutput *) {}
+  void visit(luci::CircleSplitVOut *) {}
+  void visit(luci::CircleSplitOut *) {}
+  void visit(luci::CircleTopKV2Out *) {}
+  void visit(luci::CircleUniqueOut *) {}
+  void visit(luci::CircleUnpackOut *) {}
+
+  // Ops that receive a single activation as an input
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleAveragePool2D, value)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleBatchToSpaceND, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleConv2D, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthToSpace, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleDepthwiseConv2D, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleElu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleExp, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFloor, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleFullyConnected, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleGather, params)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleInstanceNorm, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLocalResponseNormalization, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleLogistic, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMaxPool2D, value)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMean, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleMirrorPad, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePad, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePadV2, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CirclePRelu, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceProd, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMax, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReduceMin, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRelu, features)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReshape, tensor)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeBilinear, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleResizeNearestNeighbor, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleReverseSequence, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleRsqrt, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSlice, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSoftmax, logits)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToBatchND, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSpaceToDepth, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSqrt, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleStridedSlice, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleSum, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTanh, x)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTile, input)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTranspose, a)
+  INSERT_QUANTIZE_TO_UNARY_OP(luci::CircleTransposeConv, outBackprop)
+
+  // Ops that receive two activations as inputs
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleAdd, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleBatchMatMul, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleDiv, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleFloorDiv, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMaximum, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMinimum, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleMul, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleOneHot, on_value, off_value)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CirclePow, x, y)
+  INSERT_QUANTIZE_TO_BINARY_OP(luci::CircleSub, x, y)
+
+  // Multiple-output ops that receive one activation as inputs
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplit, input, luci::CircleSplitOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleSplitV, input, luci::CircleSplitVOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleTopKV2, input, luci::CircleTopKV2Out)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnique, input, luci::CircleUniqueOut)
+  INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP(luci::CircleUnpack, value, luci::CircleUnpackOut)
+
+  // AddN has arbitrary number of inputs
+  void visit(luci::CircleAddN *node)
    {
-    LOGGER(l);
-    INFO(l) << "QuantizeActivation visit node: " << node->name() << std::endl;
      auto arity = node->arity();
      for (uint32_t i = 0; i < arity; i++)
      {
-      auto input_node = node->arg(i);
-      auto circle_node = loco::must_cast<luci::CircleNode *>(input_node);
-
-      // Check if this is already quantized
-      if (is_quantized(circle_node))
-        continue;
-
-      // Check if this is bias (bias is quantized later)
-      auto iwo = get_input_weight_output_of_bias(circle_node);
-      if (iwo.size() > 0)
-        continue;
-
-      // Check if this is bool type (bool type is not quantized)
-      if (circle_node->dtype() == loco::DataType::BOOL)
-        continue;
-
-      // Check if this is activation
-      // We assume min/max are recorded only for activations
-      if (has_min_max(circle_node) && !is_weights(circle_node))
-      {
-        // Quantize using recorded min/max
-        auto quantparam = circle_node->quantparam();
-        assert(quantparam);
-        assert(quantparam->min.size() == 1); // only support layer-wise quant
-        assert(quantparam->max.size() == 1); // only support layer-wise quant
-        auto min = quantparam->min[0];
-        auto max = quantparam->max[0];
-
-        // Special values
-        if (circle_node->opcode() == luci::CircleOpcode::SOFTMAX)
-        {
-          min = 0.0f;
-          max = 1.0f;
-        }
-
-        float scaling_factor{0};
-        int64_t zp{0};
-        float nudged_min{0};
-        float nudged_max{0};
-
-        if (output_type == loco::DataType::U8)
-        {
-          compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
-          circle_node->dtype(loco::DataType::U8);
-        }
-        else
-        {
-          compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
-          circle_node->dtype(loco::DataType::S16);
-        }
-
-        // Nodes fused with activation functions which need special quantization
-        auto fused_act_node =
-          dynamic_cast<CircleNodeMixin<CircleNodeTrait::FusedActFunc> *>(circle_node);
-        if (fused_act_node != nullptr &&
-            fused_act_node->fusedActivationFunction() == FusedActFunc::TANH)
-        {
-          if (output_type == loco::DataType::U8)
-          {
-            scaling_factor = 2.0f / 256.0f;
-            zp = 128;
-          }
-          else
-          {
-            assert(output_type == loco::DataType::S16);
-            scaling_factor = 1.0f / 32768.0f;
-            zp = 0;
-          }
-        }
-
-        // The output of these Ops should be integer, so scale should be integer
-        // TODO Handle cases where the integer scale needs to be propagated
-        if (circle_node->opcode() == CircleOpcode::FLOOR ||
-            circle_node->opcode() == CircleOpcode::FLOOR_DIV ||
-            circle_node->opcode() == CircleOpcode::FLOOR_MOD ||
-            circle_node->opcode() == CircleOpcode::CEIL)
-        {
-          assert(scaling_factor >= 0); // FIX_ME_UNLESS
-          scaling_factor = scaling_factor < 1 ? 1.0f : std::round(scaling_factor);
-        }
-
-        circle_node->quantparam()->scale.push_back(scaling_factor);
-        circle_node->quantparam()->zerop.push_back(zp);
-      }
-      // Fix special attributes
-      if (circle_node->opcode() == luci::CircleOpcode::CAST)
-      {
-        auto *cast = loco::must_cast<luci::CircleCast *>(circle_node);
-        auto *cast_input = loco::must_cast<luci::CircleNode *>(cast->x());
-
-        // make sure that cast_input is already quantized
-        assert(cast_input->dtype() != loco::DataType::FLOAT32);
-        cast->in_data_type(cast_input->dtype());
-        cast->out_data_type(cast->dtype());
-      }
-    }
-    return false;
-  }
-};
-
-struct QuantizeBias final : public luci::CircleNodeMutableVisitor<bool>
-{
-  QuantizeBias(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-    : input_type(input), output_type(output), granularity(gr)
-  {
-  }
-
-  loco::DataType input_type;
-  loco::DataType output_type;
-  QuantizationGranularity granularity;
-
-  // Quantize bias node
-  bool visit(luci::CircleNode *node)
-  {
-    // Check if this is already quantized
-    if (is_quantized(node))
-      return false;
-
-    auto iwo_list = get_input_weight_output_of_bias(node);
-
-    for (auto iwo : iwo_list)
-    {
-      assert(iwo.size() == 3);
-
-      auto input = loco::must_cast<luci::CircleNode *>(iwo[0]);
-      auto weight = loco::must_cast<luci::CircleNode *>(iwo[1]);
-      auto output = loco::must_cast<luci::CircleNode *>(iwo[2]);
-
-      auto const_bias = loco::must_cast<luci::CircleConst *>(node);
-      assert(const_bias->dtype() == loco::DataType::FLOAT32);
-
-      // If input is const, it is quantized here, not in QuantizeActivation
-      if (auto const_input = dynamic_cast<luci::CircleConst *>(input))
-      {
-        quant_const(const_input, output_type);
-      }
-
-      CircleConst *new_bias = nullptr;
-
-      if (granularity == QuantizationGranularity::ChannelWise)
-      {
-        auto input_q = input->quantparam();
-        assert(input_q);
-        assert(input_q->scale.size() == 1); // input scale's layer-wise
-        auto input_scale = input_q->scale[0];
-
-        assert(weight->quantparam() != nullptr); // weight scale's channel-wise
-        auto weight_scale = weight->quantparam()->scale;
-
-        uint32_t size = const_bias->size<loco::DataType::FLOAT32>();
-        assert(size == weight_scale.size());
-        std::vector<float> scaling_factor(size);
-        std::vector<int64_t> zp(size);
-
-        if (output_type == loco::DataType::U8)
-        {
-          new_bias =
-            quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
-        }
-        else if (output_type == loco::DataType::S16)
-        {
-          new_bias =
-            int16_quant_bias_per_channel(const_bias, input_scale, weight_scale, scaling_factor, zp);
-        }
-        else
-        {
-          throw std::runtime_error("Unsupported quantization type.");
-        }
-
-        auto quantparam = std::make_unique<CircleQuantParam>();
-        quantparam->scale = scaling_factor;
-        quantparam->zerop = zp;
-        assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
-        new_bias->quantparam(std::move(quantparam));
-
-        set_bias(output, new_bias);
-      }
-      else
-      {
-        auto input_q = input->quantparam();
-        assert(input_q);
-        assert(input_q->scale.size() == 1); // Only support per-layer quant
-        auto input_scale = input_q->scale[0];
-
-        auto weight_q = weight->quantparam();
-        assert(weight_q);
-        assert(weight_q->scale.size() == 1); // Only support per-layer quant
-        auto weight_scale = weight_q->scale[0];
-
-        float scaling_factor{0};
-        int64_t zp{0};
-        new_bias =
-          asym_quant_bias_per_layer(const_bias, input_scale, weight_scale, &scaling_factor, &zp);
-        auto quantparam = std::make_unique<CircleQuantParam>();
-        quantparam->scale.push_back(scaling_factor);
-        quantparam->zerop.push_back(zp);
-        assert(new_bias->quantparam() == nullptr); // bias should not be quantized before
-        new_bias->quantparam(std::move(quantparam));
-
-        set_bias(output, new_bias);
-      }
-    }
-    return false;
-  }
-};
-
-/**
- * @brief QuantizeWeights quantizes tensors for weights
- * @details Find min/max values on the fly and then quantize
- */
-struct QuantizeWeights final : public luci::CircleNodeMutableVisitor<bool>
-{
-  QuantizeWeights(loco::DataType input, loco::DataType output, QuantizationGranularity gr)
-    : input_type(input), output_type(output), granularity(gr)
-  {
-  }
-
-  loco::DataType input_type;
-  loco::DataType output_type;
-  QuantizationGranularity granularity;
-
-private:
-  void quantize_weights(luci::CircleConst *weights)
-  {
-    // Find min/max per channel-wise
-    if (granularity == QuantizationGranularity::ChannelWise)
-    {
-      auto quantparam = weights->quantparam();
-      if (quantparam == nullptr)
-      {
-        assert(false && "quantparam is nullptr");
-        return;
-      }
-
-      auto min = quantparam->min;
-      auto scaling_factor = quantparam->scale;
-      int32_t channel_dim_index = 0;
-
-      if (output_type == loco::DataType::U8)
-      {
-        asym_wquant_per_channel(weights, min, scaling_factor, channel_dim_index);
-      }
-      else
-      {
-        sym_wquant_per_channel(weights, scaling_factor, channel_dim_index);
-      }
-      quantparam->min.clear();
-      quantparam->max.clear();
-      quantparam->quantized_dimension = channel_dim_index;
-    }
-    // Find min/max per layer-wise
-    else
-    {
-      // Quantize using recorded quantparam
-      auto quantparam = weights->quantparam();
-      assert(quantparam != nullptr);
-      assert(quantparam->min.size() == 1);   // only support layer-wise quant
-      assert(quantparam->scale.size() == 1); // only support layer-wise quant
-      auto min = quantparam->min[0];
-      auto scaling_factor = quantparam->scale[0];
-      asym_wquant_per_layer(weights, min, scaling_factor);
-      quantparam->min.clear();
-      quantparam->max.clear();
-    }
-  }
-
-  bool visit(luci::CircleConv2D *node)
-  {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
-    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
-    if (!is_quantized(weights))
-    {
-      auto new_weights = luci::clone(weights);
-      node->filter(new_weights);
-      quantize_weights(new_weights);
-      return true;
+      if (auto input_quant = create_in_quantize(node->inputs(i), node))
+        node->inputs(i, input_quant);
      }
-    return false;
-  }
-
-  bool visit(luci::CircleDepthwiseConv2D *node)
-  {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
  
-    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
-    if (!is_quantized(weights))
-    {
-      auto new_weights = luci::clone(weights);
-      node->filter(new_weights);
-      quantize_weights(new_weights);
-      return true;
-    }
-    return false;
+    insert_out_quantize(node);
    }
  
-  bool visit(luci::CircleInstanceNorm *node)
+  // Concat has arbitrary number of inputs
+  void visit(luci::CircleConcatenation *node)
    {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
-    auto gamma = loco::must_cast<luci::CircleConst *>(node->gamma());
-    auto beta = loco::must_cast<luci::CircleConst *>(node->beta());
-
-    bool changed = false;
-    if (!is_quantized(gamma))
-    {
-      assert(gamma->dtype() == loco::DataType::FLOAT32);
-      auto new_gamma = luci::clone(gamma);
-      if (granularity == QuantizationGranularity::LayerWise)
-        quant_const(new_gamma, output_type);
-      else if (granularity == QuantizationGranularity::ChannelWise)
-        quant_const_per_channel(new_gamma, output_type);
-      node->gamma(new_gamma);
-      changed = true;
-    }
-    if (!is_quantized(beta))
-    {
-      assert(beta->dtype() == loco::DataType::FLOAT32);
-      auto new_beta = luci::clone(beta);
-      if (granularity == QuantizationGranularity::LayerWise)
-        quant_const(new_beta, output_type);
-      else if (granularity == QuantizationGranularity::ChannelWise)
-        quant_const_per_channel(new_beta, output_type);
-      node->beta(new_beta);
-      changed = true;
-    }
-
-    return changed;
-  }
-
-  bool visit(luci::CirclePRelu *node)
-  {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
-    auto alpha = loco::must_cast<luci::CircleConst *>(node->alpha());
-
-    if (!is_quantized(alpha))
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
      {
-      assert(alpha->dtype() == loco::DataType::FLOAT32);
-      auto new_alpha = luci::clone(alpha);
-      if (granularity == QuantizationGranularity::LayerWise)
-        quant_const(new_alpha, output_type);
-      else if (granularity == QuantizationGranularity::ChannelWise)
-        quant_const_per_channel(new_alpha, output_type);
-      node->alpha(new_alpha);
-      return true;
+      if (auto input_quant = create_in_quantize(node->values(i), node))
+        node->values(i, input_quant);
      }
  
-    return false;
+    insert_out_quantize(node);
    }
  
-  bool visit(luci::CircleTransposeConv *node)
+  // Pack has arbitrary number of inputs
+  void visit(luci::CirclePack *node)
    {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
-
-    auto weights = loco::must_cast<luci::CircleConst *>(node->filter());
-    if (!is_quantized(weights))
+    auto arity = node->arity();
+    for (uint32_t i = 0; i < arity; i++)
      {
-      auto new_weights = luci::clone(weights);
-      node->filter(new_weights);
-      quantize_weights(new_weights);
-      return true;
+      if (auto input_quant = create_in_quantize(node->values(i), node))
+        node->values(i, input_quant);
      }
-    return false;
-  }
-
-  bool visit(luci::CircleFullyConnected *node)
-  {
-    LOGGER(l);
-    INFO(l) << "QuantizeWeights visit node: " << node->name() << std::endl;
  
-    auto weights = loco::must_cast<luci::CircleConst *>(node->weights());
-    if (!is_quantized(weights))
-    {
-      auto new_weights = luci::clone(weights);
-      node->weights(new_weights);
-      quantize_weights(new_weights);
-      return true;
-    }
-    return false;
+    insert_out_quantize(node);
    }
  
-  bool visit(luci::CircleNode *) { return false; }
+#undef INSERT_QUANTIZE_TO_UNARY_OP
+#undef INSERT_QUANTIZE_TO_BINARY_OP
+#undef INSERT_QUANTIZE_TO_UNARY_MULTI_OUTPUT_OP
  };
  
-/** EXAMPLE
- *
- * BEFORE
- *
- *         [CircleNode]       [CircleConst]
- *           (qparam1)           (FP32)
- *                   \            /
- *                    \          /
- *                    [CirclePack]
- *                     (qparam2)
- *
- *  AFTER
- *
- *         [CircleNode]        [CircleConst]   [CircleConst] <- Dead node
- *           (qparam2)           (qparam2)         (FP32)
- *                   \            /
- *                    \          /
- *                    [CirclePack]
- *                     (qparam2)
- *
- * NOTE Quantization parameter of CirclePack (qparam2) is propagated to the inputs.
- */
-void propagate_pack_quantparam(luci::CirclePack *pack, loco::DataType quant_type)
-{
-  assert(pack->quantparam() != nullptr);
-
-  const auto num_inputs = pack->values_count();
-
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    auto node = loco::must_cast<luci::CircleNode *>(pack->arg(i));
-
-    // Skip if this input is PACK Op
-    if (node->opcode() == luci::CircleOpcode::PACK)
-      continue;
-
-    // Quantize constant values
-    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
-    {
-      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
-      if (const_node->dtype() != loco::DataType::FLOAT32)
-        throw std::runtime_error("Unsupported data type for constant input of pack Op");
-
-      const auto pack_qparam = pack->quantparam();
-      if (pack_qparam == nullptr)
-        throw std::runtime_error("quantparam of pack is not found during propagation");
-
-      assert(pack_qparam->scale.size() == 1);
-      assert(pack_qparam->zerop.size() == 1);
-      const auto scaling_factor = pack_qparam->scale[0];
-      const auto zerop = pack_qparam->zerop[0];
-
-      auto new_const = luci::clone(const_node);
-      quant_const_values(new_const, scaling_factor, zerop, quant_type);
-      pack->values(i, new_const);
-      overwrite_quantparam(pack, new_const);
-    }
-    else
-    {
-      const auto succs = loco::succs(node);
-      if (succs.size() > 1)
-        continue;
-
-      // Non-const input must have been quantized
-      assert(node->quantparam() != nullptr);
-      overwrite_quantparam(pack, node);
-    }
-  }
-}
-
-/**
- * @brief Quantize const input tensors using min/max of const values
- */
-void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
-{
-  auto opcode = node->opcode();
-  auto arity = node->arity();
-
-  loco::Node *input_node{nullptr};
-  luci::CircleConst *const_node{nullptr};
-
-  switch (opcode)
-  {
-    case luci::CircleOpcode::CONV_2D:
-    case luci::CircleOpcode::DEPTHWISE_CONV_2D:
-    case luci::CircleOpcode::FULLY_CONNECTED:
-    case luci::CircleOpcode::INSTANCE_NORM:
-    case luci::CircleOpcode::PRELU:
-    case luci::CircleOpcode::TRANSPOSE_CONV:
-      // Handled in QuantizeWeights and QuantizeBias
-      break;
-
-    case luci::CircleOpcode::CONCATENATION:
-      // Handled in propagate_concat_quantparam
-      break;
-
-    case luci::CircleOpcode::LOGICAL_OR:
-      // Inputs of logical Ops are bool, thus not quantized
-      break;
-
-    case luci::CircleOpcode::ARG_MAX:
-    case luci::CircleOpcode::ARG_MIN:
-    case luci::CircleOpcode::BATCH_TO_SPACE_ND:
-    case luci::CircleOpcode::LOCAL_RESPONSE_NORMALIZATION:
-    case luci::CircleOpcode::MEAN:
-    case luci::CircleOpcode::MIRROR_PAD:
-    case luci::CircleOpcode::PAD:
-    case luci::CircleOpcode::REDUCE_ANY:
-    case luci::CircleOpcode::REDUCE_PROD:
-    case luci::CircleOpcode::REDUCE_MAX:
-    case luci::CircleOpcode::REDUCE_MIN:
-    case luci::CircleOpcode::RESHAPE:
-    case luci::CircleOpcode::RESIZE_BILINEAR:
-    case luci::CircleOpcode::RESIZE_NEAREST_NEIGHBOR:
-    case luci::CircleOpcode::REVERSE_SEQUENCE:
-    case luci::CircleOpcode::SLICE:
-    case luci::CircleOpcode::SPACE_TO_BATCH_ND:
-    case luci::CircleOpcode::SPLIT_V:
-    case luci::CircleOpcode::STRIDED_SLICE:
-    case luci::CircleOpcode::SUM:
-    case luci::CircleOpcode::TILE:
-    case luci::CircleOpcode::TOPK_V2:
-    case luci::CircleOpcode::TRANSPOSE:
-      // The second input of these Ops should not be quantized
-      // Ex: axis, paddings
-      input_node = node->arg(0);
-      const_node = dynamic_cast<luci::CircleConst *>(input_node);
-      if (const_node != nullptr && !is_quantized(const_node))
-        quant_const(const_node, output_type);
-      break;
-
-    case luci::CircleOpcode::ADD:
-    case luci::CircleOpcode::ADD_N:
-    case luci::CircleOpcode::DEPTH_TO_SPACE:
-    case luci::CircleOpcode::DIV:
-    case luci::CircleOpcode::ELU:
-    case luci::CircleOpcode::EQUAL:
-    case luci::CircleOpcode::EXP:
-    case luci::CircleOpcode::FLOOR:
-    case luci::CircleOpcode::FLOOR_DIV:
-    case luci::CircleOpcode::GREATER:
-    case luci::CircleOpcode::GREATER_EQUAL:
-    case luci::CircleOpcode::LESS:
-    case luci::CircleOpcode::LESS_EQUAL:
-    case luci::CircleOpcode::LOGISTIC:
-    case luci::CircleOpcode::MAXIMUM:
-    case luci::CircleOpcode::MINIMUM:
-    case luci::CircleOpcode::MUL:
-    case luci::CircleOpcode::NOT_EQUAL:
-    case luci::CircleOpcode::POW:
-    case luci::CircleOpcode::RSQRT:
-    case luci::CircleOpcode::SOFTMAX:
-    case luci::CircleOpcode::SPACE_TO_DEPTH:
-    case luci::CircleOpcode::SQRT:
-    case luci::CircleOpcode::SUB:
-    case luci::CircleOpcode::TANH:
-    case luci::CircleOpcode::UNPACK:
-      // Quantize all const inputs using their values
-      for (uint32_t i = 0; i < arity; i++)
-      {
-        input_node = node->arg(i);
-        const_node = dynamic_cast<luci::CircleConst *>(input_node);
-        if (const_node != nullptr && !is_quantized(const_node))
-          quant_const(const_node, output_type);
-      }
-      break;
-
-    case luci::CircleOpcode::SPLIT:
-      // Only the second input is quantized
-      // First input should not be quantized (e.g., split_dim)
-      input_node = node->arg(1);
-      const_node = dynamic_cast<luci::CircleConst *>(input_node);
-      if (const_node != nullptr && !is_quantized(const_node))
-        quant_const(const_node, output_type);
-      break;
-
-    case luci::CircleOpcode::PADV2:
-      // First and third constant inputs are quantized
-      // Second input should not be quantized (e.g., paddings)
-      // Quant params are propagated either from output range to the non-constant input
-      // or from input to output and constant values
-      propagate_pad_v2_quantparam(loco::must_cast<CirclePadV2 *>(node), output_type);
-      break;
-
-    case luci::CircleOpcode::PACK:
-      // Quant param is propagated from output to inputs
-      propagate_pack_quantparam(loco::must_cast<CirclePack *>(node), output_type);
-      break;
-
-    default:
-      for (uint32_t i = 0; i < arity; i++)
-      {
-        input_node = node->arg(i);
-        const_node = dynamic_cast<luci::CircleConst *>(input_node);
-        if (const_node != nullptr)
-          throw std::runtime_error("Unsupported Op for const inputs");
-      }
-      break;
-  }
-}
-
  } // namespace
  
-/** BEFORE
- *
- *         [CircleNode]             [CircleConst]
- *         (U8 qparam1)                 (FP32)
- *                   \                    /
- *                    \                  /
- *                    [CircleConcatenation]
- *                        (U8 qparam2)
- *
- *  AFTER
- *         [CircleNode]             [CircleConst]   [CircleConst] <- Dead node
- *         (U8 qparam2)             (U8 qparam2)       (FP32)
- *                   \                    /
- *                    \                  /
- *                    [CircleConcatenation]
- *                        (U8 qparam2)
- */
-void propagate_concat_quantparam(luci::CircleConcatenation *concat, loco::DataType quant_type)
-{
-  assert(concat->quantparam() != nullptr);
-
-  const auto num_inputs = concat->numValues();
-
-  // Quantize const inputs using their values if concat has fused act function
-  if (concat->fusedActivationFunction() != luci::FusedActFunc::NONE)
-  {
-    for (uint32_t i = 0; i < num_inputs; i++)
-    {
-      auto node = concat->arg(i);
-      auto const_node = dynamic_cast<luci::CircleConst *>(node);
-      if (const_node != nullptr)
-      {
-        auto new_const = luci::clone(const_node);
-        quant_const(new_const, quant_type);
-        concat->values(i, new_const);
-      }
-    }
-    return;
-  }
-
-  for (uint32_t i = 0; i < num_inputs; i++)
-  {
-    auto node = loco::must_cast<luci::CircleNode *>(concat->arg(i));
-
-    // Skip if this input is CONCAT Op
-    if (node->opcode() == luci::CircleOpcode::CONCATENATION)
-      continue;
-
-    // Quantize constant values
-    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
-    {
-      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
-      if (const_node->dtype() != loco::DataType::FLOAT32)
-        throw std::runtime_error("Unsupported data type for constant input of concatenation Op");
-
-      const auto concat_qparam = concat->quantparam();
-      if (concat_qparam == nullptr)
-        throw std::runtime_error("quantparam of concat is not found during propagation");
-
-      assert(concat_qparam->scale.size() == 1);
-      const auto scaling_factor = concat_qparam->scale[0];
-      const auto zerop = concat_qparam->zerop[0];
-
-      auto new_const = luci::clone(const_node);
-      quant_const_values(new_const, scaling_factor, zerop, quant_type);
-      concat->values(i, new_const);
-      overwrite_quantparam(concat, new_const);
-    }
-    else
-    {
-      const auto succs = loco::succs(node);
-      if (succs.size() > 1)
-        continue;
-
-      // Non-const input must have been quantized
-      assert(node->quantparam() != nullptr);
-      overwrite_quantparam(concat, node);
-    }
-  }
-}
-
-/**
- * tells if pad_v2 quantization should ignore padding value
- * In that case padding const will be quantized with input parameters, and probably clipped
- */
-bool ignore_pad_v2_const_quantization(luci::CirclePadV2 *pad)
-{
-  // This is a workaround to quantize pad generated from MaxPoolWithArgmax operation properly
-  // TODO use metadata hints to detect this case
-  auto const_value_node = dynamic_cast<luci::CircleConst *>(pad->arg(2));
-  if (!const_value_node)
-    return false;
-  if (const_value_node->dtype() == loco::DataType::FLOAT32)
-  {
-    float const_value = const_value_node->at<loco::DataType::FLOAT32>(0);
-    if (const_value == std::numeric_limits<float>::lowest())
-      return true;
-  }
-  return false;
-}
-
-/** BEFORE
- *
- *         [CircleNode] [CircleConst] [CircleConst]
- *         (U8 qparam1)     (S32)       (FP32)
- *                   \        |         /
- *                    \       |        /
- *                      [CirclePadV2]
- *                       (U8 qparam2)
- *
- *  AFTER (case 1)
- *
- *  By default qparam is propagated from output to inputs to meet backend requirements.
- *
- *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
- *         (U8 qparam2)     (S32)      (U8 qparam2)       (FP32)
- *                   \        |         /
- *                    \       |        /
- *                      [CirclePadV2]
- *                       (U8 qparam2)
- *
- *  AFTER (case 2)
- *
- * In case padded value is the lowest float value
- * Qparam is propagated from input to output and constant.
- *
- * This is a special case for optimization constructed pad, needed to guarantee that
- * extremely large negative constant do not stretch output quantization range.
- *
- *         [CircleNode] [CircleConst] [CircleConst]   [CircleConst] <- Dead node
- *         (U8 qparam1)     (S32)      (U8 qparam1)       (FP32)
- *                   \        |         /
- *                    \       |        /
- *                      [CirclePadV2]
- *                       (U8 qparam1)
- */
-void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant_type)
-{
-  if (ignore_pad_v2_const_quantization(pad_v2))
-  {
-    // propagate input quantization paramters from input to output and padding const value
-    auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
-    overwrite_quantparam(pad_v2_input, pad_v2);
-
-    auto const_value_node = loco::must_cast<luci::CircleConst *>(
-      pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
-    auto new_const = luci::clone(const_value_node);
-
-    const auto pad_v2_input_qparam = pad_v2_input->quantparam();
-    assert(pad_v2_input_qparam != nullptr);
-    assert(pad_v2_input_qparam->scale.size() == 1);
-    const auto scaling_factor = pad_v2_input_qparam->scale.at(0);
-    const auto zerop = pad_v2_input_qparam->zerop.at(0);
-
-    quant_const_values(new_const, scaling_factor, zerop, quant_type);
-    overwrite_quantparam(pad_v2_input, new_const);
-    pad_v2->constant_values(new_const);
-    return;
-  }
-
-  // Propagate quantization paramters from output to inputs,
-  // to fit both input and counstant_value in one quant range.
-  auto quant_input = [pad_v2, quant_type](void (CirclePadV2::*arg_setter)(loco::Node *),
-                                          uint32_t arg) {
-    auto node = loco::must_cast<luci::CircleNode *>(pad_v2->arg(arg));
-
-    // Quantize constant values
-    if (node->opcode() == luci::CircleOpcode::CIRCLECONST)
-    {
-      luci::CircleConst *const_node = loco::must_cast<luci::CircleConst *>(node);
-      if (is_quantized(const_node))
-        return;
-
-      if (const_node->dtype() != loco::DataType::FLOAT32)
-        throw std::runtime_error("Unsupported data type for constant input of PadV2 Op");
-
-      const auto pad_v2_qparam = pad_v2->quantparam();
-      if (pad_v2_qparam == nullptr)
-        throw std::runtime_error("quantparam of PadV2 is not found during propagation");
-
-      assert(pad_v2_qparam->scale.size() == 1);
-      const auto scaling_factor = pad_v2_qparam->scale.at(0);
-      const auto zerop = pad_v2_qparam->zerop.at(0);
-
-      auto new_const = luci::clone(const_node);
-      quant_const_values(new_const, scaling_factor, zerop, quant_type);
-      overwrite_quantparam(pad_v2, new_const);
-      (pad_v2->*arg_setter)(new_const);
-    }
-    // Subsequent PadV2 Ops quant params are not propagated
-    else if (node->opcode() == luci::CircleOpcode::PADV2)
-    {
-      return;
-    }
-    else
-    {
-      const auto succs = loco::succs(node);
-      if (succs.size() > 1)
-        return;
-
-      // Non-const input must have been quantized
-      assert(node->quantparam() != nullptr);
-      overwrite_quantparam(pad_v2, node);
-    }
-  };
-
-  quant_input(&CirclePadV2::input, 0);
-  quant_input(&CirclePadV2::constant_values, 2);
-}
-
  void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
  {
    auto inputs = g->inputs();
    for (auto node : loco::input_nodes(g))
    {
      auto input = loco::must_cast<luci::CircleInput *>(node);
-    if (input->dtype() == _input_type)
+    if (input->dtype() == _ctx->input_type)
        continue;
  
      // Bool type is not quantizable
      if (input->dtype() == loco::DataType::BOOL)
        continue;
+    if (input->dtype() == loco::DataType::S32)
+      continue;
+    if (input->dtype() == loco::DataType::S64)
+      continue;
  
      // Insert Quantize Op
      auto quant_op = create_quantize_op(input, input->dtype());
@@ -1552,22 +367,22 @@ void QuantizeWithMinMaxPass::set_input_type(loco::Graph *g) const
        float nudged_min{0};
        float nudged_max{0};
  
-      if (_input_type == loco::DataType::U8)
+      if (_ctx->input_type == loco::DataType::U8)
        {
          compute_asym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
        }
        else
        {
-        assert(_input_type == loco::DataType::S16);
+        assert(_ctx->input_type == loco::DataType::S16);
          compute_sym_scale_zp(min, max, scaling_factor, zp, nudged_min, nudged_max);
        }
-      input->dtype(_input_type);
+      input->dtype(_ctx->input_type);
        input->quantparam()->scale[0] = scaling_factor;
        input->quantparam()->zerop[0] = zp;
      }
  
      auto graph_input = inputs->at(input->index());
-    graph_input->dtype(_input_type);
+    graph_input->dtype(_ctx->input_type);
    }
  }
  
@@ -1577,7 +392,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
    for (auto node : loco::output_nodes(g))
    {
      auto output = loco::must_cast<luci::CircleOutput *>(node);
-    if (output->dtype() == _output_type)
+    if (output->dtype() == _ctx->output_type)
        continue;
  
      // Bool type is not quantizable
@@ -1591,7 +406,7 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
        continue;
  
      // Insert Quantize Op
-    auto quant_op = create_quantize_op(from, _output_type);
+    auto quant_op = create_quantize_op(from, _ctx->output_type);
      loco::replace(from).with(quant_op);
      quant_op->input(from);
  
@@ -1599,67 +414,165 @@ void QuantizeWithMinMaxPass::set_output_type(loco::Graph *g) const
      luci::add_origin(quant_op, luci::get_origin(from));
  
      auto graph_output = outputs->at(output->index());
-    graph_output->dtype(_output_type);
+    graph_output->dtype(_ctx->output_type);
    }
  }
  
+/**
+ * How QuantizeWithMinMax works?
+ *
+ * We categorized tensors into four groups
+ * - Activation: Feature maps (both Const/Non-const)
+ * - Weights: Const tensors of specific Ops (Conv, FC, ...)
+ * - Bias: Const tensors of specific Ops (Conv, FC, ...)
+ * - Others: padding value, one_hot value, axis, ..
+ *
+ * Activation is quantized in different ways
+ * 1. For non-constant activation, quantize using recorded min/max
+ * 2. For constant activation, quantize using min/max of its value
+ * 3. For some Ops (ex: pad_v2), output qparam is used as input qparam (backward propagation)
+ * 4. For some Ops (ex: reshape), input qparam is used as output qparam (forward propagation)
+ * 5. For some Ops (ex: tanh), output qparam has pre-defined values
+ *
+ * Weights is quantized using min/max of its value
+ *
+ * Bias is quantized using input scale (s_i) and weights scale (s_w)
+ * - Activation and weights should be quantized earlier than bias
+ *
+ * Quantization Steps
+ * 1. Quantize Activation
+ *   - Quantize using recorded min/max (QuantizeActivation)
+ *   - Insert Quantize Ops for mixed-precision quantization (InsertQuantizeOp)
+ *   - Remove redundant Quantize Ops (RemoveRedundantQuantizePass)
+ *   - Propagate qparam backward (PropagateQParamBackwardPass)
+ *   - Quantize const inputs (QuantizeConstInputActivation)
+ *   - Quantize using pre-defined values (QuantizeSpecialActivation)
+ *   - Propagate qparam forward (PropagateQParamForwardPass)
+ * 2. Quantize Weights
+ * 3. Quantize Bias
+ * 4. Set input dtype
+ * 5. Set output dtype
+ *
+ * Why quantization sequence was determined as above?
+ * - Activation and weights should be quantized before bias (1->2->3). Input/Output
+ *   dtype can be updated at the end (4->5).
+ * - During activation quantization,
+ *   - Backward propagation is performed earlier than forward propagation. This allows
+ *     backward-propagated qpram to be overwritten during forward propagation.
+ *     We made this decision as Ops for forward propagation (reshape, transpose, ..)
+ *     are more common than backward propagation. TODO Check this decision is safe.
+ *   - QuantizeSpecialActivation is called before forward propagation to make sure that
+ *     the pre-defined qparam values are propagated.
+ */
  bool QuantizeWithMinMaxPass::run(loco::Graph *g)
  {
    LOGGER(l);
    INFO(l) << "QuantizeWithMinMaxPass Start" << std::endl;
  
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
    // Quantize activation
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeActivation qa(_input_model_dtype, _output_model_dtype);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeActivation qa(_ctx->input_model_dtype, quantize_dtype(circle_node));
      circle_node->accept(&qa);
    }
  
-  // Quantize weights
+  // Insert Quantize Op
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&qw);
+    auto op_dtype = quantize_dtype(circle_node);
+    if (op_dtype != _ctx->output_model_dtype)
+    {
+      InsertQuantizeOp iqo(_ctx->output_model_dtype, op_dtype);
+      circle_node->accept(&iqo);
+    }
    }
  
-  // Quantize bias
+  // Remove redundant Quantize Op
+  {
+    logo::Phase phase;
+
+    phase.emplace_back(std::make_unique<luci::RemoveRedundantQuantizePass>());
+
+    ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+    logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+    phase_runner.attach(&prog);
+    phase_runner.run(phase);
+  }
+
+  // Backward propagation of activation qparam
+  {
+    PropagateQParamBackwardPass pqbp(_ctx->output_model_dtype);
+    pqbp.run(g);
+  }
+
+  // Quantize const input activation
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&qb);
+    QuantizeConstInputActivation qcia(quantize_dtype(circle_node));
+    circle_node->accept(&qcia);
    }
  
-  // Propagate quantization parameters of concat Op
+  // Update qparam of output of special Ops
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    auto concat = dynamic_cast<luci::CircleConcatenation *>(node);
-    if (not concat)
-      continue;
-
-    // Propagate qparam of concat to its inputs if
-    // (1) concat is uint8-quantized
-    // (2) concat has no fused activation function
-    // (3) the input is not concatenation Op
-    // (4) the input is not produced to Ops other than concat
-    propagate_concat_quantparam(concat, _output_model_dtype);
+    auto circle_node = loco::must_cast<luci::CircleNode *>(node);
+    QuantizeSpecialActivation qsa(_ctx->input_model_dtype, quantize_dtype(circle_node));
+    circle_node->accept(&qsa);
    }
  
-  // Quantize const inputs other than weights and bias
+  // Forward propagation of activation qparam
+  logo::Phase phase;
+
+  phase.emplace_back(std::make_unique<luci::PropagateQParamForwardPass>(_ctx->TF_style_maxpool));
+
+  ProgressReporter prog(g, logo::PhaseStrategy::Saturate);
+  logo::PhaseRunner<logo::PhaseStrategy::Saturate> phase_runner{g};
+  phase_runner.attach(&prog);
+  phase_runner.run(phase);
+
+  // Quantize weights
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    quantize_const_inputs(circle_node, _output_model_dtype);
+    QuantizeWeights qw(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                       quantize_granularity(circle_node));
+    circle_node->accept(&qw);
    }
  
-  // Update qparam of output of special Ops
+  // Quantize bias
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    circle_node->accept(&qsa);
+    QuantizeBias qb(_ctx->input_model_dtype, quantize_dtype(circle_node),
+                    quantize_granularity(circle_node));
+    circle_node->accept(&qb);
    }
  
    // Update output dtype
@@ -1667,11 +580,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
    for (auto node : loco::output_nodes(g))
    {
      auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
-    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype)
+    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _ctx->output_model_dtype)
      {
-      circle_node->dtype(_output_model_dtype);
+      circle_node->dtype(_ctx->output_model_dtype);
        auto graph_output = graph_outputs->at(circle_node->index());
-      graph_output->dtype(_output_model_dtype);
+      graph_output->dtype(_ctx->output_model_dtype);
      }
    }
  
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp

index 75ec0cfd86c6940ad0b8faebfbe3dff9459dbcb3..d5fa21ffd603313a579270bb13f9149ae793754c 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.test.cpp
@@ -16,8 +16,41 @@
  
  #include "luci/Pass/QuantizeWithMinMaxPass.h"
  
+#include <luci/IR/CircleNodes.h>
+
  #include <gtest/gtest.h>
  
+class SimpleConcatGraph
+{
+public:
+  SimpleConcatGraph(loco::DataType quant_type)
+  {
+    concat_node = g.nodes()->create<luci::CircleConcatenation>(2);
+    input_1 = g.nodes()->create<luci::CircleConst>();
+    input_2 = g.nodes()->create<luci::CircleConst>();
+
+    concat_node->dtype(quant_type);
+    concat_node->fusedActivationFunction(luci::FusedActFunc::NONE);
+    input_1->dtype(quant_type);
+    input_2->dtype(quant_type);
+
+    concat_node->values(0, input_1);
+    concat_node->values(1, input_2);
+  }
+
+  ~SimpleConcatGraph()
+  {
+    concat_node->values(0, nullptr);
+    concat_node->values(1, nullptr);
+  }
+
+public:
+  loco::Graph g;
+  luci::CircleConcatenation *concat_node = nullptr;
+  luci::CircleConst *input_1 = nullptr;
+  luci::CircleConst *input_2 = nullptr;
+};
+
  TEST(QuantizeWithMinMaxPassTest, name)
  {
    luci::QuantizeWithMinMaxPass pass(loco::DataType::FLOAT32, loco::DataType::U8,
@@ -25,3 +58,19 @@ TEST(QuantizeWithMinMaxPassTest, name)
    auto const name = pass.name();
    ASSERT_NE(nullptr, name);
  }
+
+// Test concat of integer tensors
+// Integer tensors are not quantized
+TEST(QuantizeWithMinMaxPassTest, int_concat)
+{
+  SimpleConcatGraph g(loco::DataType::S32);
+
+  luci::QuantizeWithMinMaxPass qwmm(loco::DataType::FLOAT32, loco::DataType::U8,
+                                    luci::QuantizationGranularity::LayerWise);
+
+  qwmm.run(&g.g);
+
+  EXPECT_EQ(nullptr, g.concat_node->quantparam());
+  EXPECT_EQ(nullptr, g.input_1->quantparam());
+  EXPECT_EQ(nullptr, g.input_2->quantparam());
+}
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.cpp

index f02301ed1edf367be8dd38586e5d43d9def35489..684d5d48a7df15212548acf4755cef2cb07a01bc 100644 (file)
--- a/compiler/luci/pass/src/QuantizedModelVerifier.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.cpp
@@ -15,10 +15,10 @@
  
  #include "QuantizedModelVerifier.h"
  
-#include "VerifyQuantizedNodeLayerWiseGranularity.h"
-#include "VerifyQuantizedNodeChannelWiseGranularity.h"
-#include "VerifyQuantizedNodeU8Type.h"
-#include "VerifyQuantizedNodeS16Type.h"
+#include "VerifyQuantizedNodeGranularity.h"
+#include "VerifyQuantizedNodeType.h"
+#include "VerifyQuantizedBiasScale.h"
+#include "helpers/LayerInfoMap.h"
  
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
@@ -28,12 +28,33 @@ namespace luci
  
  void QuantizedModelVerifier::verify(loco::Graph *g)
  {
-  if (_quantized_dtype != Type::U8 && _quantized_dtype != Type::S16)
-    throw std::runtime_error("Unsupported quantized dtype");
-
-  if (_granularity != Granularity::ChannelWise && _granularity != Granularity::LayerWise)
+  if (_ctx->granularity != Granularity::ChannelWise && _ctx->granularity != Granularity::LayerWise)
      throw std::runtime_error("Unsupported granularity");
  
+  auto info_by_name = layer_info_map(g, _ctx->layers_info);
+
+  auto quantize_dtype = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization dtype
+    if (iter != info_by_name.end())
+      return iter->second.dtype;
+
+    // Return default quantization dtype
+    return _ctx->output_model_dtype;
+  };
+
+  auto quantize_granularity = [&](const luci::CircleNode *node) {
+    auto iter = info_by_name.find(node->name());
+
+    // Return designated quantization granularity
+    if (iter != info_by_name.end())
+      return iter->second.granularity;
+
+    // Return default quantization granularity
+    return _ctx->granularity;
+  };
+
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
@@ -46,32 +67,17 @@ void QuantizedModelVerifier::verify(loco::Graph *g)
      };
  
      // Verify Type
-    if (_quantized_dtype == Type::U8)
-    {
-      VerifyQuantizedNodeU8Type vt;
-      if (!circle_node->accept(&vt))
-        throw std::runtime_error("Wrong data type detected in " + node_name());
-    }
-    else if (_quantized_dtype == Type::S16)
-    {
-      VerifyQuantizedNodeS16Type vt;
-      if (!circle_node->accept(&vt))
-        throw std::runtime_error("Wrong data type detected in " + node_name());
-    }
+    if (!VerifyQuantizedNodeType::create(quantize_dtype(circle_node))->verify(circle_node))
+      throw std::runtime_error("Wrong data type detected in " + node_name());
  
      // Verify Granularity
-    if (_granularity == Granularity::LayerWise)
-    {
-      VerifyQuantizedNodeLayerWiseGranularity vg;
-      if (!circle_node->accept(&vg))
-        throw std::runtime_error("Wrong granularity detected in " + node_name());
-    }
-    else if (_granularity == Granularity::ChannelWise)
-    {
-      VerifyQuantizedNodeChannelWiseGranularity vg;
-      if (!circle_node->accept(&vg))
-        throw std::runtime_error("Wrong granularity detected in " + node_name());
-    }
+    if (!circle_node->accept(
+          VerifyQuantizedNodeGranularity::create(quantize_granularity(circle_node)).get()))
+      throw std::runtime_error("Wrong granularity detected in " + node_name());
+
+    // Verify Bias scale
+    if (!VerifyQuantizedBiasScale::create()->verify(circle_node))
+      throw std::runtime_error("Wrong bias scale detected in " + node_name());
    }
  }
  
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.h b/compiler/luci/pass/src/QuantizedModelVerifier.h

index d5fbb8e7473ee606cc36157102bb1d7df72a5853..7409a51d7aa495f4e7bb309274d55e5e0ec5df5c 100644 (file)
--- a/compiler/luci/pass/src/QuantizedModelVerifier.h
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.h
@@ -21,6 +21,8 @@
  
  #include <loco.h>
  
+#include <memory>
+
  namespace luci
  {
  
@@ -31,18 +33,40 @@ namespace luci
   */
  struct QuantizedModelVerifier
  {
+public:
+  struct Context
+  {
+    loco::DataType output_model_dtype = loco::DataType::Unknown;
+    QuantizationGranularity granularity = QuantizationGranularity::ChannelWise;
+    loco::DataType input_type = loco::DataType::Unknown;
+    loco::DataType output_type = loco::DataType::Unknown;
+    bool TF_style_maxpool = false;
+    std::vector<LayerInfo> layers_info;
+  };
  
  public:
    QuantizedModelVerifier(loco::DataType quantized_dtype, QuantizationGranularity granularity)
-    : _quantized_dtype(quantized_dtype), _granularity(granularity)
    {
+    _ctx = std::make_unique<Context>();
+    {
+      _ctx->output_model_dtype = quantized_dtype;
+      _ctx->granularity = granularity;
+      _ctx->input_type = quantized_dtype;
+      _ctx->output_type = quantized_dtype;
+      _ctx->TF_style_maxpool = false;
+    }
+  }
+
+public:
+  QuantizedModelVerifier(std::unique_ptr<Context> &&ctx) : _ctx{std::move(ctx)}
+  {
+    // DO NOTHING
    }
  
    void verify(loco::Graph *g);
  
  private:
-  loco::DataType _quantized_dtype;
-  QuantizationGranularity _granularity;
+  std::unique_ptr<Context> _ctx;
  };
  
  } // namespace luci
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp

index 3a6d86c331a147c974ad67fac5f72e553eb8f624..cebafd32b4b5cbac48a9a33adc71fa519d4dad10 100644 (file)
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -17,6 +17,7 @@
  #include "QuantizedModelVerifier.h"
  
  #include "luci/Pass/QuantizeWithMinMaxPass.h"
+#include "luci/Pass/QuantizationParameters.h"
  
  #include <luci/test/TestIOGraph.h>
  
@@ -112,57 +113,77 @@ void quantize_and_verify(loco::Graph *g, Type quantized_dtype, Granularity granu
    verifier.verify(g);
  }
  
-// Helper function to reduce duplicate test codes
-// Assumption: g->output()->from() is the target node
-void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
-                                         Granularity granularity, Type wrong_dtype)
+void quantize_and_verify_with_layer_info(loco::Graph *g, Type quantized_dtype,
+                                         Granularity granularity)
  {
-  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
-  pass.run(g->g());
-
-  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
-  node->dtype(wrong_dtype);
+  // A layer named "test" has dtype different from quantized_dtype
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    // dtype is different from quantized_dtype
+    info.dtype = quantized_dtype == Type::U8 ? Type::S16 : Type::U8;
+    info.granularity = Granularity::ChannelWise;
+  }
  
-  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
-  verifier.verify(g->g());
-}
+  // Do quantization
+  {
+    auto ctx = std::make_unique<luci::QuantizeWithMinMaxPass::Context>();
+    {
+      ctx->input_model_dtype = Type::FLOAT32;
+      ctx->output_model_dtype = quantized_dtype;
+      ctx->granularity = granularity;
+      ctx->input_type = quantized_dtype;
+      ctx->output_type = quantized_dtype;
+      ctx->TF_style_maxpool = false;
+      ctx->layers_info.push_back(info);
+    }
  
-void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
-                                         Granularity granularity, Type wrong_dtype,
-                                         luci::CircleNode *target)
-{
-  luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
-  pass.run(g->g());
+    luci::QuantizeWithMinMaxPass pass(std::move(ctx));
+    pass.run(g);
+  }
  
-  target->dtype(wrong_dtype);
+  // Do verification
+  {
+    auto ctx = std::make_unique<luci::QuantizedModelVerifier::Context>();
+    {
+      ctx->output_model_dtype = quantized_dtype;
+      ctx->granularity = granularity;
+      ctx->input_type = quantized_dtype;
+      ctx->output_type = quantized_dtype;
+      ctx->TF_style_maxpool = false;
+      ctx->layers_info.push_back(info);
+    }
  
-  luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
-  verifier.verify(g->g());
+    luci::QuantizedModelVerifier verifier(std::move(ctx));
+    verifier.verify(g);
+  }
  }
  
  // Helper function to reduce duplicate test codes
  // Assumption: g->output()->from() is the target node
-void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
-                                                Granularity granularity)
+void quantize_and_verify_with_wrong_type(luci::test::TestIOGraph *g, Type quantized_dtype,
+                                         Granularity granularity, Type wrong_dtype)
  {
    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
    pass.run(g->g());
  
    auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
-  insert_scale_zp(node, 1.0, 1);
+  node->dtype(wrong_dtype);
  
    luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
    verifier.verify(g->g());
  }
  
  // Helper function to reduce duplicate test codes
+// Assumption: g->output()->from() is the target node
  void quantize_and_verify_with_wrong_granularity(luci::test::TestIOGraph *g, Type quantized_dtype,
-                                                Granularity granularity, luci::CircleNode *target)
+                                                Granularity granularity)
  {
    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, quantized_dtype, granularity);
    pass.run(g->g());
  
-  insert_scale_zp(target, 1.0, 1);
+  auto node = loco::must_cast<luci::CircleNode *>(g->output()->from());
+  insert_scale_zp(node, 1.0, 1);
  
    luci::QuantizedModelVerifier verifier(quantized_dtype, granularity);
    verifier.verify(g->g());
@@ -230,6 +251,8 @@ public:
        _instnorm->input(input());
        _instnorm->gamma(_gamma);
        _instnorm->beta(_beta);
+      _instnorm->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _instnorm->name("test");
      }
      output()->from(_instnorm);
  
@@ -256,6 +279,7 @@ public:
      _logistic = g()->nodes()->create<luci::CircleLogistic>();
      {
        _logistic->x(input());
+      _logistic->name("test");
      }
      output()->from(_logistic);
  
@@ -275,6 +299,7 @@ public:
      _lrn = g()->nodes()->create<luci::CircleLocalResponseNormalization>();
      {
        _lrn->input(input());
+      _lrn->name("test");
      }
      output()->from(_lrn);
  
@@ -295,6 +320,7 @@ public:
      {
        _softmax->logits(input());
        _softmax->beta(0.1);
+      _softmax->name("test");
      }
      output()->from(_softmax);
  
@@ -324,6 +350,7 @@ public:
        _stob->input(input());
        _stob->block_shape(_block_shape);
        _stob->paddings(_paddings);
+      _stob->name("test");
      }
      output()->from(_stob);
  
@@ -346,6 +373,7 @@ public:
      {
        _stod->input(input());
        _stod->block_size(2);
+      _stod->name("test");
      }
      output()->from(_stod);
  
@@ -375,6 +403,7 @@ public:
        _slice->input(input());
        _slice->begin(_begin);
        _slice->size(_size);
+      _slice->name("test");
      }
      output()->from(_slice);
  
@@ -472,6 +501,7 @@ public:
        _slice->begin(_begin);
        _slice->end(_end);
        _slice->strides(_strides);
+      _slice->name("test");
      }
      output()->from(_slice);
  
@@ -499,6 +529,7 @@ public:
      {
        _reshape->tensor(input());
        _reshape->shape(_shape);
+      _reshape->name("test");
      }
      output()->from(_reshape);
  
@@ -519,6 +550,7 @@ public:
      _tanh = g()->nodes()->create<luci::CircleTanh>();
      {
        _tanh->x(input());
+      _tanh->name("test");
      }
      output()->from(_tanh);
  
@@ -538,6 +570,7 @@ public:
      _floor = g()->nodes()->create<luci::CircleFloor>();
      {
        _floor->x(input());
+      _floor->name("test");
      }
      output()->from(_floor);
  
@@ -601,6 +634,7 @@ public:
        _btos->input(input());
        _btos->block_shape(_block_shape);
        _btos->crops(_crops);
+      _btos->name("test");
      }
      output()->from(_btos);
  
@@ -623,6 +657,7 @@ public:
      {
        _dtos->input(input());
        _dtos->block_size(2);
+      _dtos->name("test");
      }
      output()->from(_dtos);
  
@@ -645,6 +680,7 @@ public:
        _pack->values(0, input());
        _pack->values(1, _param);
        _pack->axis(0);
+      _pack->name("test");
      }
      output()->from(_pack);
  
@@ -680,6 +716,7 @@ public:
      {
        _pad->input(input());
        _pad->paddings(_paddings);
+      _pad->name("test");
      }
      output()->from(_pad);
  
@@ -707,6 +744,7 @@ public:
        _pad->input(input());
        _pad->paddings(_paddings);
        _pad->constant_values(_constant_values);
+      _pad->name("test");
      }
      output()->from(_pad);
  
@@ -735,6 +773,7 @@ public:
        _mirror_pad->input(input());
        _mirror_pad->paddings(_paddings);
        _mirror_pad->mode(luci::MirrorPadMode::REFLECT);
+      _mirror_pad->name("test");
      }
      output()->from(_mirror_pad);
  
@@ -761,6 +800,7 @@ public:
      {
        _transpose->a(input());
        _transpose->perm(_perm);
+      _transpose->name("test");
      }
      output()->from(_transpose);
  
@@ -784,6 +824,8 @@ public:
        _concat->values(0, input());
        _concat->values(1, _param);
        _concat->axis(0);
+      _concat->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _concat->name("test");
      }
      output()->from(_concat);
  
@@ -795,6 +837,54 @@ private:
    luci::CircleConst *_param = nullptr;
  };
  
+template <Type indexT> class OneHotTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32, 10});
+    {
+      // input dtype is float by default, but OneHot's input should have indexType (s32/s64)
+      input()->dtype(indexT);
+    }
+
+    _depth = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _depth->dtype(loco::DataType::S32);
+    }
+
+    _on_value = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _on_value->dtype(loco::DataType::FLOAT32);
+    }
+
+    _off_value = g()->nodes()->template create<luci::CircleConst>();
+    {
+      _off_value->dtype(loco::DataType::FLOAT32);
+    }
+
+    _one_hot = g()->nodes()->template create<luci::CircleOneHot>();
+    {
+      _one_hot->indices(input());
+      _one_hot->depth(_depth);
+      _one_hot->on_value(_on_value);
+      _one_hot->off_value(_off_value);
+      _one_hot->axis(-1);
+      _one_hot->dtype(loco::DataType::FLOAT32);
+      _one_hot->name("test");
+    }
+    output()->from(_one_hot);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleOneHot *_one_hot = nullptr;
+  luci::CircleConst *_depth = nullptr;
+  luci::CircleConst *_on_value = nullptr;
+  luci::CircleConst *_off_value = nullptr;
+};
+
  // Test graph for comparison Ops
  // GREATER, GREATER_EQUAL, LESS, LESS_EQUAL, EQUAL, NOT_EQUAL
  template <class Op> class ComparisonOpTestGraph final : public SimpleTestGraph
@@ -866,6 +956,7 @@ public:
      {
        _div->x(input());
        _div->y(_const);
+      _div->name("test");
      }
      output()->from(_div);
  
@@ -893,6 +984,7 @@ public:
      {
        _floor_div->x(input());
        _floor_div->y(_const);
+      _floor_div->name("test");
      }
      output()->from(_floor_div);
  
@@ -917,6 +1009,7 @@ public:
      _rsqrt = g()->nodes()->create<luci::CircleRsqrt>();
      {
        _rsqrt->x(input());
+      _rsqrt->name("test");
      }
      output()->from(_rsqrt);
  
@@ -936,6 +1029,7 @@ public:
      _sqrt = g()->nodes()->create<luci::CircleSqrt>();
      {
        _sqrt->x(input());
+      _sqrt->name("test");
      }
      output()->from(_sqrt);
  
@@ -955,6 +1049,7 @@ public:
      _elu = g()->nodes()->create<luci::CircleElu>();
      {
        _elu->features(input());
+      _elu->name("test");
      }
      output()->from(_elu);
  
@@ -977,6 +1072,7 @@ public:
      {
        _pow->x(input());
        _pow->y(_const);
+      _pow->name("test");
      }
      output()->from(_pow);
  
@@ -1004,6 +1100,7 @@ public:
      {
        _resize_bilinear->input(input());
        _resize_bilinear->size(_size);
+      _resize_bilinear->name("test");
      }
      output()->from(_resize_bilinear);
  
@@ -1027,6 +1124,7 @@ public:
      {
        _resize_nearest_neighbor->input(input());
        _resize_nearest_neighbor->size(_size);
+      _resize_nearest_neighbor->name("test");
      }
      output()->from(_resize_nearest_neighbor);
  
@@ -1067,6 +1165,62 @@ private:
    luci::CircleConst *_unpack_dim = nullptr;
  };
  
+class MulTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _mul = g()->nodes()->create<luci::CircleMul>();
+    {
+      _mul->x(input());
+      _mul->y(_const);
+      _mul->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _mul->name("test");
+    }
+    output()->from(_mul);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _mul->x(); }
+  loco::Node *y() { return _mul->y(); }
+
+private:
+  luci::CircleMul *_mul = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
+class AddTestGraph final : public SimpleTestGraph
+{
+public:
+  void init(void) override
+  {
+    TestIOGraph::init({32}, {32});
+
+    _const = create_dummy_const<Type::FLOAT32>(g(), {32});
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    {
+      _add->x(input());
+      _add->y(_const);
+      _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+      _add->name("test");
+    }
+    output()->from(_add);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+  loco::Node *x() { return _add->x(); }
+  loco::Node *y() { return _add->y(); }
+
+private:
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_const = nullptr;
+};
+
  } // namespace
  
  // Quantize and verify with given configurations
@@ -1078,6 +1232,15 @@ private:
      EXPECT_NO_THROW(quantize_and_verify(g.g(), type, granularity)); \
    } while (0)
  
+// Quantize and verify with layer info
+#define TEST_WITH_LAYER_INFO(graph, type, granularity)                              \
+  do                                                                                \
+  {                                                                                 \
+    graph g;                                                                        \
+    g.init();                                                                       \
+    EXPECT_NO_THROW(quantize_and_verify_with_layer_info(g.g(), type, granularity)); \
+  } while (0)
+
  // Quantize and verify with wrong type
  #define TEST_WITH_WRONG_TYPE(graph, type, granularity, wrong_dtype)                            \
    do                                                                                           \
@@ -1098,25 +1261,34 @@ private:
  
  // Quantize and verify with wrong type
  // Users can specify the test target
-#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target)    \
-  do                                                                                  \
-  {                                                                                   \
-    graph g;                                                                          \
-    g.init();                                                                         \
-    auto node = loco::must_cast<luci::CircleNode *>(target);                          \
-    EXPECT_ANY_THROW(                                                                 \
-      quantize_and_verify_with_wrong_type(&g, type, granularity, wrong_dtype, node)); \
+#define TEST_WITH_WRONG_TYPE_TARGET(graph, type, granularity, wrong_dtype, target) \
+  do                                                                               \
+  {                                                                                \
+    graph g;                                                                       \
+    g.init();                                                                      \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                       \
+    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity);           \
+    pass.run(g.g());                                                               \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);                 \
+    after_node->dtype(wrong_dtype);                                                \
+    luci::QuantizedModelVerifier verifier(type, granularity);                      \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                      \
    } while (0)
  
  // Quantize and verify with wrong granularity
  // Users can specify the test target
-#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target)                   \
-  do                                                                                           \
-  {                                                                                            \
-    graph g;                                                                                   \
-    g.init();                                                                                  \
-    auto node = loco::must_cast<luci::CircleNode *>(target);                                   \
-    EXPECT_ANY_THROW(quantize_and_verify_with_wrong_granularity(&g, type, granularity, node)); \
+#define TEST_WITH_WRONG_GRANULARITY_TARGET(graph, type, granularity, target) \
+  do                                                                         \
+  {                                                                          \
+    graph g;                                                                 \
+    g.init();                                                                \
+    auto node = loco::must_cast<luci::CircleNode *>(target);                 \
+    luci::QuantizeWithMinMaxPass pass(Type::FLOAT32, type, granularity);     \
+    pass.run(g.g());                                                         \
+    auto after_node = loco::must_cast<luci::CircleNode *>(target);           \
+    insert_scale_zp(after_node, 1.0, 1);                                     \
+    luci::QuantizedModelVerifier verifier(type, granularity);                \
+    EXPECT_ANY_THROW(verifier.verify(g.g()));                                \
    } while (0)
  
  // Test a local helper function
@@ -1145,6 +1317,10 @@ TEST(QuantizedModelVerifierTest, InstanceNorm)
    TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(InstanceNormTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1169,6 +1345,10 @@ TEST(QuantizedModelVerifierTest, LocalResponseNormalization)
    TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(LocalResponseNormalizationTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1199,6 +1379,10 @@ TEST(QuantizedModelVerifierTest, Logistic)
    TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(LogisticTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1223,6 +1407,10 @@ TEST(QuantizedModelVerifierTest, Softmax)
    TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SoftmaxTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1247,6 +1435,10 @@ TEST(QuantizedModelVerifierTest, SpaceToBatchND)
    TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SpaceToBatchNDTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1271,6 +1463,10 @@ TEST(QuantizedModelVerifierTest, SpaceToDepth)
    TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SpaceToDepthTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1299,6 +1495,14 @@ TEST(QuantizedModelVerifierTest, Slice)
    TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SliceTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1379,6 +1583,10 @@ TEST(QuantizedModelVerifierTest, StridedSlice)
    TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(StridedSliceTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1463,6 +1671,10 @@ TEST(QuantizedModelVerifierTest, BatchToSpaceND)
    TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(BatchToSpaceNDTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1487,6 +1699,10 @@ TEST(QuantizedModelVerifierTest, DepthToSpace)
    TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(DepthToSpaceTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1511,6 +1727,10 @@ TEST(QuantizedModelVerifierTest, Concatenation)
    TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ConcatenationTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1557,6 +1777,10 @@ TEST(QuantizedModelVerifierTest, Reshape)
    TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ReshapeTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1581,6 +1805,10 @@ TEST(QuantizedModelVerifierTest, Tanh)
    TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(TanhTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(TanhTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(TanhTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1606,6 +1834,10 @@ TEST(QuantizedModelVerifierTest, Pack)
    TEST_WITH_GRAPH(PackTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(PackTestGraph, Type::S16, Granularity::ChannelWise);
  
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PackTestGraph, Type::S16, Granularity::ChannelWise);
+
    // Test if Pack's qparam is propagated to the input
    {
      PackTestGraph g;
@@ -1640,6 +1872,10 @@ TEST(QuantizedModelVerifierTest, Pad)
    TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(PadTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(PadTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PadTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1664,6 +1900,10 @@ TEST(QuantizedModelVerifierTest, PadV2)
    TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PadV2TestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1688,6 +1928,10 @@ TEST(QuantizedModelVerifierTest, MirrorPad)
    TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(MirrorPadTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1712,6 +1956,10 @@ TEST(QuantizedModelVerifierTest, Transpose)
    TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(TransposeTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1736,6 +1984,10 @@ TEST(QuantizedModelVerifierTest, Floor)
    TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(FloorTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(FloorTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(FloorTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1869,11 +2121,59 @@ TEST(QuantizedModelVerifierTest, NotEqual_wrong_granularity_NEG)
    SUCCEED();
  }
  
+TEST(QuantizedModelVerifierTest, OneHot)
+{
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_input_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise, Type::U8);
+
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, OneHot_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S32>, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(OneHotTestGraph<Type::S64>, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
  TEST(QuantizedModelVerifierTest, Div)
  {
    TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(DivTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(DivTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(DivTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1902,6 +2202,10 @@ TEST(QuantizedModelVerifierTest, FloorDiv)
    TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(FloorDivTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1930,6 +2234,10 @@ TEST(QuantizedModelVerifierTest, Rsqrt)
    TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(RsqrtTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1954,6 +2262,10 @@ TEST(QuantizedModelVerifierTest, Sqrt)
    TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(SqrtTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -1978,6 +2290,10 @@ TEST(QuantizedModelVerifierTest, Elu)
    TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(EluTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(EluTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(EluTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -2002,6 +2318,10 @@ TEST(QuantizedModelVerifierTest, Pow)
    TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(PowTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(PowTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(PowTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -2030,6 +2350,10 @@ TEST(QuantizedModelVerifierTest, ResizeBilinear)
    TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -2054,6 +2378,10 @@ TEST(QuantizedModelVerifierTest, ResizeNearestNeighbor)
    TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::LayerWise);
    TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::U8, Granularity::ChannelWise);
    TEST_WITH_GRAPH(ResizeNearestNeighborTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(ResizeBilinearTestGraph, Type::S16, Granularity::ChannelWise);
    SUCCEED();
  }
  
@@ -2099,6 +2427,93 @@ TEST(QuantizedModelVerifierTest, Unpack_wrong_granularity_NEG)
    SUCCEED();
  }
  
+TEST(QuantizedModelVerifierTest, Add)
+{
+  TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(AddTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(AddTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(AddTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(AddTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Add_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(AddTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul)
+{
+  TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(MulTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(MulTestGraph, Type::S16, Granularity::ChannelWise);
+
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_LAYER_INFO(MulTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(MulTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, Mul_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.x());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.x());
+
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::LayerWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::U8, Granularity::ChannelWise, g.y());
+  TEST_WITH_WRONG_GRANULARITY_TARGET(MulTestGraph, Type::S16, Granularity::ChannelWise, g.y());
+  SUCCEED();
+}
+
+// TODO Add following testcases
+//
+// CircleConv2D
+//
+// CircleDepthwiseConv2D
+//
+// CirclePRelu
+//
+// CircleTransposeConv
+//
+// CircleFullyConnected
+//
+// CircleAveragePool2D
+//
+// CircleMaxPool2D
+//
+// CircleMean
+//
+// CircleRelu
+//
+// CircleCast
+//
+
  #undef TEST_WITH_GRAPH
  #undef TEST_WITH_WRONG_TYPE
  #undef TEST_WITH_WRONG_GRANULARITY
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp

new file mode 100644 (file)

index 0000000..8a10ad4
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNode.h>
+
+/**
+ *  Remove redundant quantize operations. For subsequent Quantize Ops,
+ *  only the last Quantize Op is valid, so we can remove the rest of the Quantize Op.
+ *
+ *  BEFORE
+ *                                          [CircleNode_1]
+ *                                                |
+ *                             [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ *                                                |
+ *                             [CircleQuantize, dtype_2, scale_2, zero_point_2]
+ *                                                |
+ *                                         [CircleNode_2]
+ *
+ *  AFTER
+ *                                          [CircleNode_1]
+ *                                         /              \
+ *                                      /                    \
+ *                                   /                          \
+ *                                /                                \
+ *                             /                                      \
+ * [CircleQuantize, dtype_2, scale_2, zero_point_2] [CircleQuantize, dtype_1, scale_1, zero_point_1]
+ *                          |
+ *                   [CircleNode_2]
+ *
+ */
+
+namespace
+{
+
+bool remove_redundant_quantize(luci::CircleQuantize *node)
+{
+  auto pred_node = loco::must_cast<luci::CircleNode *>(node->input());
+
+  if (node->quantparam() == nullptr or pred_node->quantparam() == nullptr)
+    return false;
+
+  if (node->quantparam()->scale.size() != 1 or node->quantparam()->zerop.size() != 1 or
+      pred_node->quantparam()->scale.size() != 1 or pred_node->quantparam()->zerop.size() != 1)
+  {
+    return false;
+  }
+
+  if (node->dtype() != pred_node->dtype() or
+      pred_node->quantparam()->scale.at(0) != node->quantparam()->scale.at(0) or
+      pred_node->quantparam()->zerop.at(0) != node->quantparam()->zerop.at(0))
+  {
+    return false;
+  }
+
+  replace(node).with(pred_node);
+
+  return true;
+}
+
+bool remove_redundant_subsequent_quantize(luci::CircleQuantize *node)
+{
+  auto pred_node = dynamic_cast<luci::CircleQuantize *>(node->input());
+  if (pred_node == nullptr)
+    return remove_redundant_quantize(node);
+
+  node->input(pred_node->input());
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool RemoveRedundantQuantizePass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
+  {
+    if (auto quantize_node = dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      if (remove_redundant_subsequent_quantize(quantize_node))
+        changed = true;
+    }
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp

new file mode 100644 (file)

index 0000000..d0166bd
--- /dev/null
+++ b/compiler/luci/pass/src/RemoveRedundantQuantizePass.test.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/RemoveRedundantQuantizePass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+class QuantizeGraphlet
+{
+public:
+  QuantizeGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    _first_quantize = g->nodes()->create<luci::CircleQuantize>();
+    _first_quantize->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      _first_quantize->quantparam(std::move(quantize_param));
+    }
+    _first_quantize->name("first_quantize");
+
+    _second_quantize = g->nodes()->create<luci::CircleQuantize>();
+    _second_quantize->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      _second_quantize->quantparam(std::move(quantize_param));
+    }
+    _second_quantize->name("second_quantize");
+  }
+
+protected:
+  luci::CircleQuantize *_first_quantize = nullptr;
+  luci::CircleQuantize *_second_quantize = nullptr;
+};
+
+class RedundantSubsequentQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+  RedundantSubsequentQuantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    QuantizeGraphlet::init(g());
+
+    input()->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {1};
+      quantize_param->zerop = {1};
+      input()->quantparam(std::move(quantize_param));
+    }
+
+    _first_quantize->input(input());
+    _second_quantize->input(_first_quantize);
+
+    output()->from(_second_quantize);
+    output()->dtype(loco::DataType::U8);
+  }
+};
+
+class RedundantQuantizeGraph : public TestIOGraph, public QuantizeGraphlet
+{
+public:
+  RedundantQuantizeGraph() = default;
+
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1}, {1});
+    QuantizeGraphlet::init(g());
+
+    input()->dtype(loco::DataType::U8);
+    {
+      auto quantize_param = std::make_unique<luci::CircleQuantParam>();
+      quantize_param->scale = {0.5};
+      quantize_param->zerop = {0};
+      input()->quantparam(std::move(quantize_param));
+    }
+
+    _first_quantize->input(input());
+
+    output()->from(_first_quantize);
+    output()->dtype(loco::DataType::U8);
+  }
+};
+
+} // namespace
+
+TEST(RemoveRedundantQuantizePass, name)
+{
+  luci::RemoveRedundantQuantizePass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_subsequent_quantize)
+{
+  RedundantSubsequentQuantizeGraph g;
+  luci::RemoveRedundantQuantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(1, count);
+}
+
+TEST(RemoveRedundantQuantizePass, remove_quantize)
+{
+  RedundantQuantizeGraph g;
+  luci::RemoveRedundantQuantizePass pass;
+
+  g.init();
+
+  EXPECT_TRUE(pass.run(g.g()));
+
+  int count = 0;
+  for (auto node : loco::active_nodes(loco::output_nodes(g.g())))
+  {
+    if (dynamic_cast<luci::CircleQuantize *>(node))
+    {
+      count++;
+    }
+  }
+
+  ASSERT_EQ(0, count);
+}
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp

index 71c51ecdaa639ecb85669a454d9a90930228a663..75cf72795c613a644afee982c4ecca09eca78d3b 100644 (file)
--- a/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.cpp
@@ -71,7 +71,7 @@ bool remove_consecutive_transpose_function(luci::CircleTranspose *target_node)
      for (uint32_t i = 0; i < pred_perm->size<loco::DataType::S32>(); i++)
      {
        new_const_node->at<loco::DataType::S32>(i) =
-        target_perm->at<loco::DataType::S32>(pred_perm->at<loco::DataType::S32>(i));
+        pred_perm->at<loco::DataType::S32>(target_perm->at<loco::DataType::S32>(i));
      }
      new_const_node->name(name + "/Transpose/perm");
  
diff --git a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp

index e8062349907ffa64e0f5bfa4920da19d0a77ab8a..bb8e292d406c062cef165c8a90dcb4a2157af778 100644 (file)
--- a/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
+++ b/compiler/luci/pass/src/RemoveRedundantTransposePass.test.cpp
@@ -271,6 +271,31 @@ TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type2)
    ASSERT_EQ(2, perm->at<loco::DataType::S32>(3));
  }
  
+TEST(RemoveRedundantTransposePass, remove_consecutive_transpose_function_type3)
+{
+  auto graph = loco::make_graph();
+  create_redundunt_transpose(graph.get(), {0, 3, 2, 1}, {0, 2, 3, 1});
+
+  luci::RemoveRedundantTransposePass pass;
+  while (pass.run(graph.get()))
+    ;
+  luci::CircleTranspose *transpose_node = nullptr;
+  for (auto node : loco::active_nodes(loco::output_nodes(graph.get())))
+  {
+    auto trans = dynamic_cast<luci::CircleTranspose *>(node);
+    if (not trans)
+      continue;
+    transpose_node = trans;
+    break;
+  }
+  ASSERT_NE(nullptr, transpose_node);
+  auto perm = loco::must_cast<luci::CircleConst *>(transpose_node->perm());
+  ASSERT_EQ(0, perm->at<loco::DataType::S32>(0));
+  ASSERT_EQ(2, perm->at<loco::DataType::S32>(1));
+  ASSERT_EQ(1, perm->at<loco::DataType::S32>(2));
+  ASSERT_EQ(3, perm->at<loco::DataType::S32>(3));
+}
+
  /**
   * @brief Test case that first transpose output become input of operations more than one.
   */
diff --git a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp

index 3f0c4ee829c3b7ee7bc49ff1a9e5c162807b02a3..fb46f490de715d407149a4c6fb79a6615f28f59c 100644 (file)
--- a/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
+++ b/compiler/luci/pass/src/RemoveUnnecessaryReshapePass.cpp
@@ -58,6 +58,25 @@ bool remove_no_effect_reshape(luci::CircleNode *node)
  namespace luci
  {
  
+/**
+ * BEFORE
+ *      [CircleNode]
+ *            |
+ *     [CircleReshape]
+ *            |
+ *      [CircleNode]
+ *
+ * AFTER
+ *      [CircleNode]
+ *            |  \
+ *            |  [CircleReshape]
+ *            |
+ *      [CircleNode]
+ *
+ * NOTE
+ *     This pass will remove Reshape when input and output has same shape
+ */
+
  bool RemoveUnnecessaryReshapePass::run(loco::Graph *g)
  {
    bool changed = false;
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp

index a0cc0194fb665dfa3ca7edcdc6972dfddb874416..bca0a94830bc143cad02bc77944ab13d661a22bd 100644 (file)
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.cpp
@@ -26,8 +26,17 @@ namespace
  
  luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
  {
-  assert(gamma->rank() == 1);
-  auto channel_size = gamma->dim(0).value();
+  assert(gamma->rank() == 1 or gamma->rank() == 4);
+
+  uint32_t channel_idx = gamma->rank() - 1;
+  uint32_t channel_size = gamma->dim(channel_idx).value();
+
+  // Gamma should be broadcastable in the channel direction
+  for (uint32_t i = 0; i < gamma->rank(); i++)
+  {
+    if (i != channel_idx)
+      assert(gamma->dim(i).value() == 1); // FIX is_batchnorm_mul UNLESS
+  }
  
    auto name = gamma->name();
    assert(name.length() > 0);
@@ -53,8 +62,17 @@ luci::CircleConst *create_weights_from_gamma(luci::CircleConst *gamma)
  
  luci::CircleConst *create_bias_from_beta(luci::CircleConst *beta)
  {
-  assert(beta->rank() == 1);
-  auto channel_size = beta->dim(0).value();
+  assert(beta->rank() == 1 or beta->rank() == 4);
+
+  uint32_t channel_idx = beta->rank() - 1;
+  uint32_t channel_size = beta->dim(channel_idx).value();
+
+  // Beta should be broadcastable in the channel direction
+  for (uint32_t i = 0; i < beta->rank(); i++)
+  {
+    if (i != channel_idx)
+      assert(beta->dim(i).value() == 1); // FIX is_batchnorm_add UNLESS
+  }
  
    auto name = beta->name();
    assert(name.length() > 0);
diff --git a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp

index 903d4dcc993673c32d85c563f887814e45d59de4..bac033112c86e2925a6f1779b57c462b49a18929 100644 (file)
--- a/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
+++ b/compiler/luci/pass/src/ReplaceMulAddWithDepthwiseConvPass.test.cpp
@@ -141,6 +141,37 @@ TEST(ReplaceMulAddWithDepthwiseConv, simple)
    }
  }
  
+TEST(ReplaceMulAddWithDepthwiseConv, simple_rank4)
+{
+  SimpleGraph g;
+
+  const uint32_t channel_size = 16;
+  g.gamma->shape({1, 1, 1, channel_size});
+  g.beta->shape({1, 1, 1, channel_size});
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  while (pass.run(&g.g))
+    ;
+
+  auto dwconv = dynamic_cast<luci::CircleDepthwiseConv2D *>(g.output->from());
+  EXPECT_NE(nullptr, dwconv);
+
+  auto weights = dynamic_cast<luci::CircleConst *>(dwconv->filter());
+  auto bias = dynamic_cast<luci::CircleConst *>(dwconv->bias());
+  EXPECT_NE(nullptr, weights);
+  EXPECT_EQ(4, weights->rank());
+  EXPECT_EQ(channel_size, weights->dim(3).value());
+  EXPECT_NE(nullptr, bias);
+  EXPECT_EQ(1, bias->rank());
+  EXPECT_EQ(channel_size, bias->dim(0).value());
+
+  for (int i = 0; i < channel_size; i++)
+  {
+    EXPECT_FLOAT_EQ(i, weights->at<loco::DataType::FLOAT32>(i));
+    EXPECT_FLOAT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
+
  TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
  {
    SimpleGraph g;
@@ -154,3 +185,18 @@ TEST(ReplaceMulAddWithDepthwiseConv, wrong_op_NEG)
  
    EXPECT_EQ(false, changed);
  }
+
+TEST(ReplaceMulAddWithDepthwiseConv, rank3_NEG)
+{
+  SimpleGraph g;
+
+  g.input->shape({4, 4, 16});
+  g.mul->shape({4, 4, 16});
+  g.add->shape({4, 4, 16});
+  g.output->shape({4, 4, 16});
+
+  luci::ReplaceMulAddWithDepthwiseConvPass pass;
+  auto changed = pass.run(&g.g);
+
+  EXPECT_EQ(false, changed);
+}
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp

index 9cba9a9e719e0b33468b2cbda44dfb16a1ca7cb5..57c386d990b0f6ab37a8775072eb3bf9e195de91 100644 (file)
--- a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
@@ -24,15 +24,6 @@
  namespace
  {
  
-void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
-{
-  auto q = src->quantparam();
-  if (q == nullptr)
-    dst->quantparam(nullptr);
-  else
-    dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
-}
-
  // SplitV is substituted to Split if the contents of size_splits are all same
  // For example,
  // size_splits = [32, 32] -> substitute
@@ -67,7 +58,7 @@ bool resolve_splitv(luci::CircleSplitV *sv)
    split_node->split_dim(sv->split_dim());
    split_node->num_split(sv->num_split());
    split_node->name(sv->name());
-  copy_quantparam(split_node, sv);
+  copy_quantparam(sv, split_node);
    luci::add_origin(split_node, luci::get_origin(sv));
  
    auto succs = loco::succs(sv);
@@ -78,7 +69,7 @@ bool resolve_splitv(luci::CircleSplitV *sv)
      so_node->input(split_node);
      so_node->index(svo->index());
      so_node->name(svo->name());
-    copy_quantparam(so_node, svo);
+    copy_quantparam(svo, so_node);
      luci::add_origin(so_node, luci::get_origin(svo));
  
      replace(svo).with(so_node);
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp

index f487637824600757d5cb218e861f1f983ca32400..df7266df950431da3cfc918a58e73d263faf02ad 100644 (file)
--- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -75,18 +75,6 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input)
    return shape;
  }
  
-/**
- * @brief copy quantparam of src to dst
- */
-void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
-{
-  auto q = src->quantparam();
-  if (q == nullptr)
-    dst->quantparam(nullptr);
-  else
-    dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
-}
-
  /**
   * @brief return CircleConst ptr with values of new_shape
   */
@@ -142,7 +130,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
    auto graph = squeeze->graph();
    auto reshape = graph->nodes()->create<luci::CircleReshape>();
    auto shape_const = create_shape_const(graph, reshape_shape);
-  copy_quantparam(reshape, squeeze);
+  copy_quantparam(squeeze, reshape);
    reshape->name(name + "/Reshape");
    luci::add_origin(reshape, luci::get_origin(squeeze));
    shape_const->name(name + "/Reshape/shape");
diff --git a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp

index f50f2f54f22e634afcd126b010ad600f3e5083cf..9e1c5a4a3441506935086192408a0c747ab5ffd1 100644 (file)
--- a/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteStridedSliceToReshapePass.cpp
@@ -124,7 +124,7 @@ bool substitute_strided_slice_to_reshape(luci::CircleStridedSlice *ss_node)
    std::bitset<32> end_mask(ss_node->end_mask());
    std::bitset<32> shrink_axis_mask(ss_node->shrink_axis_mask());
  
-  uint input_rank = input_node->rank();
+  uint32_t input_rank = input_node->rank();
    for (uint32_t i = 0; i < input_rank; i++)
    {
      if (!input_node->dim(i).known())
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp

new file mode 100644 (file)

index 0000000..e65d576
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedBiasScale.h"
+
+#include <cmath>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace
+{
+
+bool same(float a, float b)
+{
+  constexpr float epsilon = 1e-10;
+  return abs(a - b) < epsilon;
+}
+
+// Check bias scale = input scale * weight scale
+// This function checks both LWQ and CWQ
+bool check_bias_scale(const loco::Node *input, const loco::Node *weights, const loco::Node *bias)
+{
+  auto input_node = loco::must_cast<const luci::CircleNode *>(input);
+  auto input_qparam = input_node->quantparam();
+  RETURN_FALSE_UNLESS(input_qparam != nullptr);
+
+  auto weights_node = loco::must_cast<const luci::CircleNode *>(weights);
+  auto weights_qparam = weights_node->quantparam();
+  RETURN_FALSE_UNLESS(weights_qparam != nullptr);
+
+  auto bias_node = loco::must_cast<const luci::CircleNode *>(bias);
+  auto bias_qparam = bias_node->quantparam();
+  RETURN_FALSE_UNLESS(bias_qparam != nullptr);
+
+  RETURN_FALSE_UNLESS(input_qparam->scale.size() == 1);
+  RETURN_FALSE_UNLESS(weights_qparam->scale.size() == bias_qparam->scale.size());
+
+  auto input_scale = input_qparam->scale[0];
+  for (uint32_t i = 0; i < weights_qparam->scale.size(); i++)
+  {
+    auto weights_scale = weights_qparam->scale[i];
+    auto bias_scale = bias_qparam->scale[i];
+    RETURN_FALSE_UNLESS(same(bias_scale, input_scale * weights_scale));
+  }
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleConv2D *node)
+{
+  RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->filter(), node->bias()));
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleFullyConnected *node)
+{
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+  {
+    RETURN_FALSE_UNLESS(check_bias_scale(node->input(), node->weights(), node->bias()));
+  }
+  return true;
+}
+
+bool VerifyQuantizedBiasScale::visit(const luci::CircleTransposeConv *node)
+{
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+  {
+    RETURN_FALSE_UNLESS(check_bias_scale(node->outBackprop(), node->filter(), node->bias()));
+  }
+  return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedBiasScale.h b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h

new file mode 100644 (file)

index 0000000..b41f78e
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedBiasScale.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+#define __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the scale of quantized bias node
+ * @details
+ *
+ * Bias of CONV, DCONV, TCONV, FC layers should meet the following condition.
+ *
+ * bias scale = input scale * weights scale
+ */
+class VerifyQuantizedBiasScale : public luci::CircleNodeVisitor<bool>
+{
+public:
+  static std::shared_ptr<VerifyQuantizedBiasScale> create()
+  {
+    return std::make_shared<VerifyQuantizedBiasScale>();
+  };
+
+public:
+  bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+private:
+  // Operators with bias
+  bool visit(const luci::CircleConv2D *node);
+  bool visit(const luci::CircleDepthwiseConv2D *node);
+  bool visit(const luci::CircleFullyConnected *node);
+  bool visit(const luci::CircleTransposeConv *node);
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_BIAS_SCALE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h

deleted file mode 100644 (file)

index bf3ff2e..0000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Pass/QuantizationParameters.h>
-
-using Granularity = luci::QuantizationGranularity;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
-  if (not(ARG))                  \
-  {                              \
-    return false;                \
-  }
-
-namespace luci
-{
-
-/**
- * @brief Verify the granualrity of channel-wise quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeChannelWiseGranularity final : public luci::CircleNodeVisitor<bool>
-{
-private:
-  bool is_lwq(const loco::Node *node)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-
-    if (circle_node->quantparam() == nullptr)
-      return false;
-
-    if (circle_node->quantparam()->scale.size() != 1)
-      return false;
-
-    if (circle_node->quantparam()->zerop.size() != 1)
-      return false;
-
-    return true;
-  }
-
-  uint32_t rank(const loco::Node *node)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-    return circle_node->rank();
-  }
-
-  bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
-
-    assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
-    auto channel_size = circle_node->dim(channel_dim).value();
-
-    if (circle_node->quantparam() == nullptr)
-      return false;
-
-    if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
-      return false;
-
-    if (circle_node->quantparam()->scale.size() != channel_size)
-      return false;
-
-    if (circle_node->quantparam()->zerop.size() != channel_size)
-      return false;
-
-    return true;
-  }
-
-private:
-  bool visit(const luci::CircleConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
-    return true;
-  }
-
-  bool visit(const luci::CircleConcatenation *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    for (uint32_t i = 0; i < node->numValues(); i++)
-    {
-      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
-    }
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthToSpace *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthwiseConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
-    return true;
-  }
-
-  bool visit(const luci::CircleInstanceNorm *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
-    return true;
-  }
-
-  bool visit(const luci::CirclePack *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    for (uint32_t i = 0; i < node->values_count(); i++)
-    {
-      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
-    }
-    return true;
-  }
-
-  bool visit(const luci::CirclePad *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CirclePadV2 *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq(node->constant_values()))
-    return true;
-  }
-
-  bool visit(const luci::CircleMirrorPad *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CirclePRelu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
-    return true;
-  }
-
-  bool visit(const luci::CircleTransposeConv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
-
-    return true;
-  }
-
-  bool visit(const luci::CircleFullyConnected *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    // Bias is optional (it can be CircleOutputExclude)
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
-    return true;
-  }
-
-  bool visit(const luci::CircleAdd *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleAveragePool2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleLogicalOr *)
-  {
-    // Logical OR has bool-type inputs and output
-    // Nothing to be checked
-    return true;
-  }
-
-  bool visit(const luci::CircleMaxPool2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleLocalResponseNormalization *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleMean *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleMul *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleNotEqual *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleRelu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->features()));
-    return true;
-  }
-
-  bool visit(const luci::CircleReshape *node)
-  {
-    auto input = loco::must_cast<const luci::CircleNode *>(node->tensor());
-    bool input_quantized = input->quantparam() != nullptr;
-    bool node_quantized = node->quantparam() != nullptr;
-    RETURN_FALSE_UNLESS(input_quantized == node_quantized);
-    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node))
-    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
-    return true;
-  }
-
-  bool visit(const luci::CircleLogistic *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSoftmax *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToBatchND *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToDepth *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSlice *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplit *node)
-  {
-    // node's output is the input of CircleSplitOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitV *node)
-  {
-    // node's output is the input of CircleSplitVOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitVOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleStridedSlice *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleArgMax *node)
-  {
-    // node's output is index, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleBatchToSpaceND *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleTanh *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleTranspose *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->a()));
-    return true;
-  }
-
-  bool visit(const luci::CircleFloor *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleGreater *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleGreaterEqual *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleDiv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleFloorDiv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleRsqrt *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSqrt *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleElu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->features()));
-    return true;
-  }
-
-  bool visit(const luci::CirclePow *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeBilinear *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeNearestNeighbor *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpack *node)
-  {
-    // node's output is the input of CircleUnpackOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpackOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleCast *node)
-  {
-    auto input = loco::must_cast<const luci::CircleNode *>(node->x());
-    bool input_quantized = input->quantparam() != nullptr;
-    bool node_quantized = node->quantparam() != nullptr;
-    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
-    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node));
-    return true;
-  }
-
-  // TODO: Implement more Ops
-
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUANTIZED_NODE_CHANNELWISE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp

new file mode 100644 (file)

index 0000000..8697090
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeGranularity.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <memory>
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeGranularity>
+VerifyQuantizedNodeGranularity::create(Granularity granularity)
+{
+  if (granularity == Granularity::ChannelWise)
+    return std::make_shared<VerifyQuantizedNodeChannelWiseGranularity>();
+  else if (granularity == Granularity::LayerWise)
+    return std::make_shared<VerifyQuantizedNodeLayerWiseGranularity>();
+  else
+    throw std::domain_error("Not supported Granularity type");
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h

new file mode 100644 (file)

index 0000000..442183c
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeGranularity.h
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <memory>
+
+using Granularity = luci::QuantizationGranularity;
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+/**
+ * @brief Verify the granualrity of quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+class VerifyQuantizedNodeGranularity : public luci::CircleNodeVisitor<bool>
+{
+public:
+  static std::shared_ptr<VerifyQuantizedNodeGranularity> create(Granularity granularity);
+
+protected:
+  bool is_lwq(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+private:
+  virtual bool visit(const luci::CircleConv2D *node) = 0;
+
+  bool visit(const luci::CircleConcatenation *node)
+  {
+    // Skip granularity check for concatenation of indices
+    if (node->dtype() == loco::DataType::S32 or node->dtype() == loco::DataType::S64)
+      return true;
+
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->numValues(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthToSpace *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  virtual bool visit(const luci::CircleDepthwiseConv2D *node) = 0;
+
+  virtual bool visit(const luci::CircleInstanceNorm *node) = 0;
+
+  bool visit(const luci::CirclePack *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    for (uint32_t i = 0; i < node->values_count(); i++)
+    {
+      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
+    }
+    return true;
+  }
+
+  bool visit(const luci::CirclePad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePadV2 *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq(node->constant_values()))
+    return true;
+  }
+
+  bool visit(const luci::CircleMirrorPad *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    return true;
+  }
+
+  virtual bool visit(const luci::CirclePRelu *node) = 0;
+
+  virtual bool visit(const luci::CircleTransposeConv *node) = 0;
+
+  virtual bool visit(const luci::CircleFullyConnected *node) = 0;
+
+  bool visit(const luci::CircleAdd *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleAveragePool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogicalOr *)
+  {
+    // Logical OR has bool-type inputs and output
+    // Nothing to be checked
+    return true;
+  }
+
+  bool visit(const luci::CircleMaxPool2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleLocalResponseNormalization *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMean *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleMul *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleNotEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleOneHot *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->off_value()));
+    RETURN_FALSE_UNLESS(is_lwq(node->on_value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CircleReshape *node)
+  {
+    auto input = loco::must_cast<const luci::CircleNode *>(node->tensor());
+    bool input_quantized = input->quantparam() != nullptr;
+    bool node_quantized = node->quantparam() != nullptr;
+    RETURN_FALSE_UNLESS(input_quantized == node_quantized);
+    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node))
+    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
+    return true;
+  }
+
+  bool visit(const luci::CircleLogistic *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSoftmax *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToBatchND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSpaceToDepth *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplit *node)
+  {
+    // node's output is the input of CircleSplitOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleStridedSlice *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleArgMax *node)
+  {
+    // node's output is index, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleBatchToSpaceND *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTanh *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleTranspose *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->a()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreater *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleGreaterEqual *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleFloorDiv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleRsqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSqrt *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    return true;
+  }
+
+  bool visit(const luci::CircleElu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->features()));
+    return true;
+  }
+
+  bool visit(const luci::CirclePow *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->x()));
+    RETURN_FALSE_UNLESS(is_lwq(node->y()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeBilinear *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleResizeNearestNeighbor *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleUnpack *node)
+  {
+    // node's output is the input of CircleUnpackOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->value()));
+    return true;
+  }
+
+  bool visit(const luci::CircleUnpackOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
+  bool visit(const luci::CircleCast *node)
+  {
+    auto input = loco::must_cast<const luci::CircleNode *>(node->x());
+    bool input_quantized = input->quantparam() != nullptr;
+    bool node_quantized = node->quantparam() != nullptr;
+    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
+    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node));
+    return true;
+  }
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+class VerifyQuantizedNodeChannelWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+  uint32_t rank(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->rank();
+  }
+
+  bool is_cwq_const(const loco::Node *node, uint32_t channel_dim)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    assert(channel_dim < circle_node->rank()); // FIX_CALLER_UNLESS
+    auto channel_size = circle_node->dim(channel_dim).value();
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->quantized_dimension != static_cast<int32_t>(channel_dim))
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != channel_size)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != channel_size)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 3))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->gamma(), rank(node->gamma()) - 1))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->beta(), rank(node->beta()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->alpha(), rank(node->alpha()) - 1))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->filter(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_cwq_const(node->weights(), 0))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    // Bias is optional (it can be CircleOutputExclude)
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_cwq_const(node->bias(), rank(node->bias()) - 1))
+    return true;
+  }
+};
+
+class VerifyQuantizedNodeLayerWiseGranularity final : public VerifyQuantizedNodeGranularity
+{
+private:
+  bool is_lwq_const(const loco::Node *node)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
+
+    if (circle_node->quantparam() == nullptr)
+      return false;
+
+    if (circle_node->quantparam()->scale.size() != 1)
+      return false;
+
+    if (circle_node->quantparam()->zerop.size() != 1)
+      return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleDepthwiseConv2D *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleInstanceNorm *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
+    return true;
+  }
+
+  bool visit(const luci::CirclePRelu *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
+    return true;
+  }
+
+  bool visit(const luci::CircleTransposeConv *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+
+  bool visit(const luci::CircleFullyConnected *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node))
+    RETURN_FALSE_UNLESS(is_lwq(node->input()))
+    RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
+    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+    if (bias != nullptr)
+      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
+    return true;
+  }
+};
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h

deleted file mode 100644 (file)

index 9bc8b31..0000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Pass/QuantizationParameters.h>
-
-using Granularity = luci::QuantizationGranularity;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
-  if (not(ARG))                  \
-  {                              \
-    return false;                \
-  }
-
-namespace luci
-{
-
-/**
- * @brief Verify the granualrity of layer-wise quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeLayerWiseGranularity final : public luci::CircleNodeVisitor<bool>
-{
-private:
-  bool is_lwq(const loco::Node *node)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-
-    if (circle_node->quantparam() == nullptr)
-      return false;
-
-    if (circle_node->quantparam()->scale.size() != 1)
-      return false;
-
-    if (circle_node->quantparam()->zerop.size() != 1)
-      return false;
-
-    return true;
-  }
-
-  bool is_lwq_const(const loco::Node *node)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleConst *>(node);
-
-    if (circle_node->quantparam() == nullptr)
-      return false;
-
-    if (circle_node->quantparam()->scale.size() != 1)
-      return false;
-
-    if (circle_node->quantparam()->zerop.size() != 1)
-      return false;
-
-    return true;
-  }
-
-private:
-  bool visit(const luci::CircleConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
-    return true;
-  }
-
-  bool visit(const luci::CircleConcatenation *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    for (uint32_t i = 0; i < node->numValues(); i++)
-    {
-      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
-    }
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthToSpace *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthwiseConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
-    return true;
-  }
-
-  bool visit(const luci::CircleInstanceNorm *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->gamma()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->beta()))
-    return true;
-  }
-
-  bool visit(const luci::CirclePack *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    for (uint32_t i = 0; i < node->values_count(); i++)
-    {
-      RETURN_FALSE_UNLESS(is_lwq(node->values(i)));
-    }
-    return true;
-  }
-
-  bool visit(const luci::CirclePad *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CirclePadV2 *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq(node->constant_values()))
-    return true;
-  }
-
-  bool visit(const luci::CircleMirrorPad *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    return true;
-  }
-
-  bool visit(const luci::CirclePRelu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->alpha()))
-    return true;
-  }
-
-  bool visit(const luci::CircleTransposeConv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->outBackprop()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->filter()))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
-    return true;
-  }
-
-  bool visit(const luci::CircleFullyConnected *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()))
-    RETURN_FALSE_UNLESS(is_lwq_const(node->weights()))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(is_lwq_const(node->bias()))
-    return true;
-  }
-
-  bool visit(const luci::CircleAdd *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleAveragePool2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleLogicalOr *)
-  {
-    // Logical OR has bool-type inputs and output
-    // Nothing to be checked
-    return true;
-  }
-
-  bool visit(const luci::CircleMaxPool2D *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleLocalResponseNormalization *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleMean *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleMul *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleNotEqual *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleRelu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node))
-    RETURN_FALSE_UNLESS(is_lwq(node->features()));
-    return true;
-  }
-
-  bool visit(const luci::CircleReshape *node)
-  {
-    auto input = loco::must_cast<const luci::CircleNode *>(node->tensor());
-    bool input_quantized = input->quantparam() != nullptr;
-    bool node_quantized = node->quantparam() != nullptr;
-    RETURN_FALSE_UNLESS(input_quantized == node_quantized);
-    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node))
-    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
-    return true;
-  }
-
-  bool visit(const luci::CircleLogistic *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSoftmax *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->logits()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToBatchND *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToDepth *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSlice *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplit *node)
-  {
-    // node's output is the input of CircleSplitOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitV *node)
-  {
-    // node's output is the input of CircleSplitVOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitVOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleStridedSlice *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleArgMax *node)
-  {
-    // node's output is index, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleBatchToSpaceND *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleTanh *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleTranspose *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->a()));
-    return true;
-  }
-
-  bool visit(const luci::CircleFloor *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleGreater *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleGreaterEqual *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleDiv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleFloorDiv *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleRsqrt *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleSqrt *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    return true;
-  }
-
-  bool visit(const luci::CircleElu *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->features()));
-    return true;
-  }
-
-  bool visit(const luci::CirclePow *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->x()));
-    RETURN_FALSE_UNLESS(is_lwq(node->y()));
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeBilinear *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeNearestNeighbor *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    RETURN_FALSE_UNLESS(is_lwq(node->input()));
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpack *node)
-  {
-    // node's output is the input of CircleUnpackOut, thus not quantized
-    RETURN_FALSE_UNLESS(is_lwq(node->value()));
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpackOut *node)
-  {
-    RETURN_FALSE_UNLESS(is_lwq(node));
-    return true;
-  }
-
-  bool visit(const luci::CircleCast *node)
-  {
-    auto input = loco::must_cast<const luci::CircleNode *>(node->x());
-    bool input_quantized = input->quantparam() != nullptr;
-    bool node_quantized = node->quantparam() != nullptr;
-    RETURN_FALSE_UNLESS(not input_quantized or is_lwq(input));
-    RETURN_FALSE_UNLESS(not node_quantized or is_lwq(node));
-    return true;
-  }
-
-  // TODO: Implement more Ops
-
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUANTIZED_NODE_LAYERWISE_GRANULARITY_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h

deleted file mode 100644 (file)

index eeec7b8..0000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_S16_TYPE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <cmath>
-
-using Type = loco::DataType;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
-  if (not(ARG))                  \
-  {                              \
-    return false;                \
-  }
-
-namespace luci
-{
-
-/**
- * @brief Verify the data type of INT16 quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeS16Type final : public luci::CircleNodeVisitor<bool>
-{
-private:
-  bool has_type(const loco::Node *node, Type dtype)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-    return circle_node->dtype() == dtype;
-  }
-
-private:
-  bool visit(const luci::CircleConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleConcatenation *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    for (uint32_t i = 0; i < node->numValues(); i++)
-    {
-      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
-    }
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthToSpace *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthwiseConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleInstanceNorm *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->beta(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CirclePack *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    for (uint32_t i = 0; i < node->values_count(); i++)
-    {
-      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::S16))
-    }
-    return true;
-  }
-
-  bool visit(const luci::CirclePad *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CirclePadV2 *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleMirrorPad *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CirclePRelu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleTransposeConv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::S16))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleFullyConnected *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->weights(), Type::S16))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(has_type(bias, Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleAdd *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleAveragePool2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleLogicalOr *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
-    return true;
-  }
-
-  bool visit(const luci::CircleMaxPool2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleLocalResponseNormalization *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleMean *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleMul *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleNotEqual *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleRelu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleReshape *node)
-  {
-    if (node->quantparam())
-    {
-      RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-      RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::S16))
-    }
-    else
-    {
-      RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
-    }
-    luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
-    if (shape != nullptr)
-      RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleLogistic *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
-    return true;
-  }
-
-  bool visit(const luci::CircleSoftmax *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->logits(), Type::S16))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToBatchND *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToDepth *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleSlice *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
-    RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplit *node)
-  {
-    // node's output is the input of CircleSplitOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
-    // SplitOut has the same qparam with the input of Split
-    auto split = loco::must_cast<luci::CircleSplit *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(split->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitV *node)
-  {
-    // node's output is the input of CircleSplitVOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitVOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
-    // SplitVOut has the same qparam with the input of SplitV
-    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleStridedSlice *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-
-    auto input = loco::must_cast<luci::CircleNode *>(node->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleArgMax *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
-                        has_type(node->dimension(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleBatchToSpaceND *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleTanh *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
-    return true;
-  }
-
-  bool visit(const luci::CircleTranspose *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->a(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleFloor *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-
-    // This checks the value of scale is an integer
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleGreater *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleGreaterEqual *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleDiv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleFloorDiv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-
-    // This checks the value of scale is an integer
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleRsqrt *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleSqrt *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleElu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->features(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CirclePow *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeBilinear *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeNearestNeighbor *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpack *node)
-  {
-    // node's output is the input of CircleUnpackOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::S16))
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpackOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-
-    // UnpackOut has the same qparam with the input of Unpack
-    auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
-    RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleCast *node)
-  {
-    auto *input = loco::must_cast<luci::CircleNode *>(node->x());
-    RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
-
-    bool input_quantized = input->quantparam() != nullptr;
-    if (input_quantized)
-      RETURN_FALSE_UNLESS(has_type(input, Type::S16))
-
-    RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
-
-    bool node_quantized = node->quantparam() != nullptr;
-    if (node_quantized)
-      RETURN_FALSE_UNLESS(has_type(node, Type::S16))
-    return true;
-  }
-
-  // TODO: Implement more Ops
-
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUNTIZED_NODE_S16_TYPE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp

new file mode 100644 (file)

index 0000000..4e1c062
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.cpp
@@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "VerifyQuantizedNodeType.h"
+
+#include <cmath>
+#include <memory>
+
+// This macro is undef at the end of the file
+#define RETURN_FALSE_UNLESS(ARG) \
+  if (not(ARG))                  \
+  {                              \
+    return false;                \
+  }
+
+namespace luci
+{
+
+std::shared_ptr<VerifyQuantizedNodeType> VerifyQuantizedNodeType::create(loco::DataType dtype)
+{
+  if (dtype == loco::DataType::U8)
+    return std::make_shared<VerifyQuantizedNodeU8Type>();
+  else if (dtype == loco::DataType::S16)
+    return std::make_shared<VerifyQuantizedNodeS16Type>();
+  else
+    throw std::domain_error("Not supported Quantized type");
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAdd *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleArgMax *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->dimension(), loco::DataType::S32) ||
+                      has_type(node->dimension(), loco::DataType::S64))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleAveragePool2D *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleBatchToSpaceND *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleCast *node)
+{
+  auto *input = loco::must_cast<luci::CircleNode *>(node->x());
+  bool input_quantized = input->quantparam() != nullptr;
+  if (input_quantized)
+  {
+    RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
+    RETURN_FALSE_UNLESS(has_type(input, Qtype))
+  }
+
+  bool node_quantized = node->quantparam() != nullptr;
+  if (node_quantized)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
+    RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  }
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConv2D *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleConcatenation *node)
+{
+  // Allow concatenation of indices
+  if (group_has_type(node, loco::DataType::S32) or group_has_type(node, loco::DataType::S64))
+    return true;
+
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthToSpace *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->bias(), Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleDiv *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleElu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloor *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+  // This checks the value of scale is an integer
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFloorDiv *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, Qtype));
+
+  // This checks the value of scale is an integer
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleFullyConnected *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->weights(), Qtype))
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+    RETURN_FALSE_UNLESS(has_type(bias, Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreater *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleGreaterEqual *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleInstanceNorm *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(
+  const luci::CircleLocalResponseNormalization *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleLogicalOr *node)
+{
+  return group_has_type(node, loco::DataType::BOOL);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMaxPool2D *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMean *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMirrorPad *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleMul *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleNotEqual *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, loco::DataType::BOOL))
+  RETURN_FALSE_UNLESS(has_type(node->x(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->y(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleOneHot *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype));
+  RETURN_FALSE_UNLESS(has_type(node->indices(), loco::DataType::S32) ||
+                      has_type(node->indices(), loco::DataType::S64));
+  RETURN_FALSE_UNLESS(has_type(node->depth(), loco::DataType::S32));
+  RETURN_FALSE_UNLESS(has_type(node->on_value(), Qtype));
+  RETURN_FALSE_UNLESS(has_type(node->off_value(), Qtype));
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePack *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePad *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePadV2 *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->paddings(), loco::DataType::S32))
+  RETURN_FALSE_UNLESS(has_type(node->constant_values(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePRelu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CirclePow *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRelu *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleReshape *node)
+{
+  if (node->quantparam())
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Qtype))
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), Qtype))
+  }
+  else
+  {
+    RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
+  }
+  luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
+  if (shape != nullptr)
+    RETURN_FALSE_UNLESS(has_type(shape, loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeBilinear *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleResizeNearestNeighbor *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleRsqrt *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSlice *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->begin(), loco::DataType::S32) ||
+                      has_type(node->begin(), loco::DataType::S64))
+  RETURN_FALSE_UNLESS(has_type(node->size(), loco::DataType::S32) ||
+                      has_type(node->size(), loco::DataType::S64))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToBatchND *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSpaceToDepth *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplit *node)
+{
+  // node's output is the input of CircleSplitOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // SplitOut has the same qparam with the input of Split
+  auto split = loco::must_cast<luci::CircleSplit *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(split->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitV *node)
+{
+  // node's output is the input of CircleSplitVOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSplitVOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // SplitVOut has the same qparam with the input of SplitV
+  auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleSqrt *node)
+{
+  return group_has_type(node, Qtype);
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleStridedSlice *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->input(), Qtype))
+
+  auto input = loco::must_cast<luci::CircleNode *>(node->input());
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTranspose *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->a(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->perm(), loco::DataType::S32))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleTransposeConv *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Qtype))
+  RETURN_FALSE_UNLESS(has_type(node->filter(), Qtype))
+  luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
+  if (bias != nullptr)
+    RETURN_FALSE_UNLESS(has_type(bias, Btype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpack *node)
+{
+  // node's output is the input of CircleUnpackOut, thus not quantized
+  RETURN_FALSE_UNLESS(has_type(node->value(), Qtype))
+  return true;
+}
+
+template <loco::DataType Qtype, loco::DataType Btype>
+bool VerifyQuantizedNodeTypeBase<Qtype, Btype>::visit(const luci::CircleUnpackOut *node)
+{
+  RETURN_FALSE_UNLESS(has_type(node, Qtype))
+
+  // UnpackOut has the same qparam with the input of Unpack
+  auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
+  auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
+  RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+  return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleTanh *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128);
+  return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleLogistic *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeU8Type::visit(const luci::CircleSoftmax *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::U8));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+} // namespace luci
+
+namespace luci
+{
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleTanh *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleLogistic *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32768.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+bool VerifyQuantizedNodeS16Type::visit(const luci::CircleSoftmax *node)
+{
+  RETURN_FALSE_UNLESS(group_has_type(node, loco::DataType::S16));
+
+  RETURN_FALSE_UNLESS(node->quantparam());
+  RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 32767.0f);
+  RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
+  return true;
+}
+
+} // namespace luci
+
+#undef RETURN_FALSE_UNLESS
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeType.h b/compiler/luci/pass/src/VerifyQuantizedNodeType.h

new file mode 100644 (file)

index 0000000..ff1acbd
--- /dev/null
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeType.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+#define __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/CircleNodeVisitor.h>
+
+namespace luci
+{
+
+/**
+ * @brief Verify the data type of quantized node
+ * @details
+ *
+ * Targets to verify
+ * - node's output (i.e., node itself)
+ * - node's inputs
+ */
+class VerifyQuantizedNodeType
+{
+public:
+  static std::shared_ptr<VerifyQuantizedNodeType> create(loco::DataType dtype);
+
+public:
+  virtual bool verify(luci::CircleNode *node) = 0;
+};
+
+/**
+ * @brief Verify using quantization type of a node and bias
+ *
+ * @tparam Qtype Quantization type for a node (e.g. Q8, Q16, ...)
+ * @tparam Btype Bias quantization type (e.g. For Q8, S32 is used)
+ */
+template <loco::DataType Qtype, loco::DataType Btype>
+class VerifyQuantizedNodeTypeBase : public luci::CircleNodeVisitor<bool>,
+                                    public VerifyQuantizedNodeType
+{
+public:
+  bool verify(luci::CircleNode *node) { return node->accept(this); }
+
+protected:
+  bool has_type(const loco::Node *node, loco::DataType dtype)
+  {
+    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    return circle_node->dtype() == dtype;
+  }
+
+  // Check whether a node and all of its inputs have dtype or not
+  bool group_has_type(const loco::Node *node, loco::DataType dtype)
+  {
+    if (!has_type(node, dtype))
+      return false;
+
+    for (uint32_t i = 0; i < node->arity(); ++i)
+      if (!has_type(node->arg(i), dtype))
+        return false;
+
+    return true;
+  }
+
+private:
+  bool visit(const luci::CircleAdd *node);
+  bool visit(const luci::CircleArgMax *node);
+  bool visit(const luci::CircleAveragePool2D *node);
+  bool visit(const luci::CircleBatchToSpaceND *node);
+  bool visit(const luci::CircleCast *node);
+  bool visit(const luci::CircleConv2D *node);
+  bool visit(const luci::CircleConcatenation *node);
+  bool visit(const luci::CircleDepthToSpace *node);
+  bool visit(const luci::CircleDepthwiseConv2D *node);
+  bool visit(const luci::CircleDiv *node);
+  bool visit(const luci::CircleElu *node);
+  bool visit(const luci::CircleFloor *node);
+  bool visit(const luci::CircleFloorDiv *node);
+  bool visit(const luci::CircleFullyConnected *node);
+  bool visit(const luci::CircleGreater *node);
+  bool visit(const luci::CircleGreaterEqual *node);
+  bool visit(const luci::CircleInstanceNorm *node);
+  bool visit(const luci::CircleLocalResponseNormalization *node);
+  bool visit(const luci::CircleLogicalOr *node);
+  bool visit(const luci::CircleMaxPool2D *node);
+  bool visit(const luci::CircleMean *node);
+  bool visit(const luci::CircleMirrorPad *node);
+  bool visit(const luci::CircleMul *node);
+  bool visit(const luci::CircleNotEqual *node);
+  bool visit(const luci::CircleOneHot *node);
+  bool visit(const luci::CirclePack *node);
+  bool visit(const luci::CirclePad *node);
+  bool visit(const luci::CirclePadV2 *node);
+  bool visit(const luci::CirclePRelu *node);
+  bool visit(const luci::CirclePow *node);
+  bool visit(const luci::CircleRelu *node);
+  bool visit(const luci::CircleReshape *node);
+  bool visit(const luci::CircleResizeBilinear *node);
+  bool visit(const luci::CircleResizeNearestNeighbor *node);
+  bool visit(const luci::CircleRsqrt *node);
+  bool visit(const luci::CircleSlice *node);
+  bool visit(const luci::CircleSpaceToBatchND *node);
+  bool visit(const luci::CircleSpaceToDepth *node);
+  bool visit(const luci::CircleSplit *node);
+  bool visit(const luci::CircleSplitOut *node);
+  bool visit(const luci::CircleSplitV *node);
+  bool visit(const luci::CircleSplitVOut *node);
+  bool visit(const luci::CircleSqrt *node);
+  bool visit(const luci::CircleStridedSlice *node);
+  bool visit(const luci::CircleTranspose *node);
+  bool visit(const luci::CircleTransposeConv *node);
+  bool visit(const luci::CircleUnpack *node);
+  bool visit(const luci::CircleUnpackOut *node);
+
+  // NOTE below nodes has differnent implementation for Qtype/Btype and
+  //      implementations exist in VerifyQuantizedNodeU8Type, VerifyQuantizedNodeS16Type
+  // bool visit(const luci::CircleLogistic *node);
+  // bool visit(const luci::CircleSoftmax *node);
+  // bool visit(const luci::CircleTanh *node);
+
+  // TODO: Implement more Ops
+
+  bool visit(const luci::CircleNode *) { return true; }
+};
+
+class VerifyQuantizedNodeU8Type
+  : public VerifyQuantizedNodeTypeBase<loco::DataType::U8, loco::DataType::S32>
+{
+private:
+  bool visit(const luci::CircleLogistic *node);
+  bool visit(const luci::CircleSoftmax *node);
+  bool visit(const luci::CircleTanh *node);
+};
+
+class VerifyQuantizedNodeS16Type
+  : public VerifyQuantizedNodeTypeBase<loco::DataType::S16, loco::DataType::S64>
+{
+private:
+  bool visit(const luci::CircleLogistic *node);
+  bool visit(const luci::CircleSoftmax *node);
+  bool visit(const luci::CircleTanh *node);
+};
+
+} // namespace luci
+
+#endif // __LUCI_VERIFY_QUANTIZED_NODE_TYPE_H__
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h

deleted file mode 100644 (file)

index e7dd1b0..0000000
--- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
-#define __LUCI_VERIFY_QUANTIZED_NODE_U8_TYPE_H__
-
-#include <luci/IR/CircleNodes.h>
-#include <luci/IR/CircleNodeVisitor.h>
-
-#include <cmath>
-
-using Type = loco::DataType;
-
-// This macro is undef at the end of the file
-#define RETURN_FALSE_UNLESS(ARG) \
-  if (not(ARG))                  \
-  {                              \
-    return false;                \
-  }
-
-namespace luci
-{
-
-/**
- * @brief Verify the data type of UINT8 quantized node
- * @details
- *
- * Targets to verify
- * - node's output (i.e., node itself)
- * - node's inputs
- */
-struct VerifyQuantizedNodeU8Type final : public luci::CircleNodeVisitor<bool>
-{
-private:
-  bool has_type(const loco::Node *node, Type dtype)
-  {
-    auto circle_node = loco::must_cast<const luci::CircleNode *>(node);
-    return circle_node->dtype() == dtype;
-  }
-
-private:
-  bool visit(const luci::CircleConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleConcatenation *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    for (uint32_t i = 0; i < node->numValues(); i++)
-    {
-      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
-    }
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthToSpace *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleDepthwiseConv2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->bias(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleInstanceNorm *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->gamma(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->beta(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CirclePack *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    for (uint32_t i = 0; i < node->values_count(); i++)
-    {
-      RETURN_FALSE_UNLESS(has_type(node->values(i), Type::U8))
-    }
-    return true;
-  }
-
-  bool visit(const luci::CirclePad *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CirclePadV2 *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    RETURN_FALSE_UNLESS(has_type(node->constant_values(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleMirrorPad *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->paddings(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CirclePRelu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->alpha(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleTransposeConv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->outBackprop(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->filter(), Type::U8))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleFullyConnected *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->weights(), Type::U8))
-    luci::CircleConst *bias = dynamic_cast<luci::CircleConst *>(node->bias());
-    if (bias != nullptr)
-      RETURN_FALSE_UNLESS(has_type(bias, Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleAdd *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleAveragePool2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleBatchToSpaceND *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleLogicalOr *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::BOOL))
-    return true;
-  }
-
-  bool visit(const luci::CircleMaxPool2D *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleLocalResponseNormalization *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleMean *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->reduction_indices(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleMul *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleNotEqual *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleRelu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleReshape *node)
-  {
-    if (node->quantparam())
-    {
-      RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-      RETURN_FALSE_UNLESS(has_type(node->tensor(), Type::U8))
-    }
-    else
-    {
-      RETURN_FALSE_UNLESS(has_type(node->tensor(), node->dtype()))
-    }
-    luci::CircleConst *shape = dynamic_cast<luci::CircleConst *>(node->shape());
-    if (shape != nullptr)
-      RETURN_FALSE_UNLESS(has_type(shape, Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleLogistic *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 256.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
-    return true;
-  }
-
-  bool visit(const luci::CircleSoftmax *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->logits(), Type::U8))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 1.0f / 255.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 0);
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToBatchND *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleSpaceToDepth *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleSlice *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->begin(), Type::S32) || has_type(node->begin(), Type::S64))
-    RETURN_FALSE_UNLESS(has_type(node->size(), Type::S32) || has_type(node->size(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplit *node)
-  {
-    // node's output is the input of CircleSplitOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
-    // SplitOut has the same qparam with the input of Split
-    auto split = loco::must_cast<luci::CircleSplit *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(split->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitV *node)
-  {
-    // node's output is the input of CircleSplitVOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleSplitVOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
-    // SplitVOut has the same qparam with the input of SplitV
-    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleStridedSlice *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-
-    auto input = loco::must_cast<luci::CircleNode *>(node->input());
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleArgMax *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, node->output_type()))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->dimension(), Type::S32) ||
-                        has_type(node->dimension(), Type::S64))
-    return true;
-  }
-
-  bool visit(const luci::CircleTanh *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == 2.0f / 256.0f);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == 128);
-    return true;
-  }
-
-  bool visit(const luci::CircleTranspose *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->a(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->perm(), Type::S32))
-    return true;
-  }
-
-  bool visit(const luci::CircleFloor *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-
-    // This checks the value of scale is an integer
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleGreater *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleGreaterEqual *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::BOOL))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleDiv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleFloorDiv *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-
-    // This checks the value of scale is an integer
-    RETURN_FALSE_UNLESS(node->quantparam());
-    RETURN_FALSE_UNLESS(std::roundf(node->quantparam()->scale[0]) == node->quantparam()->scale[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleRsqrt *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleSqrt *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleElu *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->features(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CirclePow *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->x(), Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->y(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeBilinear *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleResizeNearestNeighbor *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpack *node)
-  {
-    // node's output is the input of CircleUnpackOut, thus not quantized
-    RETURN_FALSE_UNLESS(has_type(node->value(), Type::U8))
-    return true;
-  }
-
-  bool visit(const luci::CircleUnpackOut *node)
-  {
-    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-
-    // UnpackOut has the same qparam with the input of Unpack
-    auto Unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
-    auto input = loco::must_cast<luci::CircleNode *>(Unpack->value());
-    RETURN_FALSE_UNLESS(node->quantparam() && input->quantparam());
-    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
-    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
-    return true;
-  }
-
-  bool visit(const luci::CircleCast *node)
-  {
-    auto *input = loco::must_cast<luci::CircleNode *>(node->x());
-    bool input_quantized = input->quantparam() != nullptr;
-    if (input_quantized)
-    {
-      RETURN_FALSE_UNLESS(has_type(input, node->in_data_type()))
-      RETURN_FALSE_UNLESS(has_type(input, Type::U8))
-    }
-
-    bool node_quantized = node->quantparam() != nullptr;
-    if (node_quantized)
-    {
-      RETURN_FALSE_UNLESS(has_type(node, node->out_data_type()))
-      RETURN_FALSE_UNLESS(has_type(node, Type::U8))
-    }
-    return true;
-  }
-
-  // TODO: Implement more Ops
-
-  bool visit(const luci::CircleNode *) { return true; }
-};
-
-} // namespace luci
-
-#undef RETURN_FALSE_UNLESS
-
-#endif // __LUCI_VERIFY_QUNTIZED_NODE_U8_TYPE_H__
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp

new file mode 100644 (file)

index 0000000..ac07f9e
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <cassert>
+
+namespace luci
+{
+namespace
+{
+
+bool is_multiple_output_node(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // The following nodes have multiple outputs. Output tensors are not produced by themselves but
+    // by the corresponding *Out nodes.
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
+    case luci::CircleOpcode::UNPACK:
+      return true;
+    // TODO: Support ops
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+    case luci::CircleOpcode::WHILE:
+      throw std::runtime_error("Unsupported op now");
+    default:
+      return false;
+  }
+}
+
+const luci::CircleNode *get_multi_output_node(const luci::CircleNode *node)
+{
+  if (is_multiple_output_node(node))
+    return node;
+
+  switch (node->opcode())
+  {
+    // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    {
+      const auto split_out = loco::must_cast<const CircleSplitOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(split_out->input());
+    }
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    {
+      const auto splitv_out = loco::must_cast<const CircleSplitVOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(splitv_out->input());
+    }
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    {
+      const auto top_kv2_out = loco::must_cast<const CircleTopKV2Out *>(node);
+      return loco::must_cast<luci::CircleNode *>(top_kv2_out->input());
+    }
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+    {
+      const auto unique_out = loco::must_cast<const CircleUniqueOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(unique_out->input());
+    }
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    {
+      const auto unpack_out = loco::must_cast<const CircleUnpackOut *>(node);
+      return loco::must_cast<luci::CircleNode *>(unpack_out->input());
+    }
+    // TODO: Support these ops
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      throw std::runtime_error("Unsupported op now");
+    default:
+      return nullptr;
+  }
+}
+
+bool same_setting(const LayerInfo &left, const LayerInfo &right)
+{
+  return left.dtype == right.dtype and left.granularity == right.granularity;
+}
+
+void add_multi_output_node(LayerInfoMap &info_by_name, LayerInfo &layer_info,
+                           const luci::CircleNode *node)
+{
+  assert(is_multiple_output_node(node)); // FIX_CALLER_UNLESS
+
+  const auto succs_nodes = loco::succs(node);
+  const auto name = node->name();
+
+  if (info_by_name.find(name) != info_by_name.end())
+  {
+    // Check that all outputs have equal dtype and granularity
+    for (const auto succs_node : succs_nodes)
+    {
+      const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+
+      const auto it = info_by_name.find(succs_circle_node->name());
+      if (it != info_by_name.end() and not same_setting(layer_info, (it->second)))
+        throw std::runtime_error("Outputs of multiple-output nodes should have equal dtype and "
+                                 "granularity. Check the quantization configuration file");
+    }
+    return;
+  }
+
+  // Add multiple output node to info_by_name
+  info_by_name[name] = {name, layer_info.dtype, layer_info.granularity};
+
+  // Add outputs node to info_by_name
+  for (const auto succs_node : succs_nodes)
+  {
+    const auto succs_circle_node = loco::must_cast<luci::CircleNode *>(succs_node);
+    const auto succs_circle_node_name = succs_circle_node->name();
+    info_by_name[succs_circle_node_name] = {succs_circle_node_name, layer_info.dtype,
+                                            layer_info.granularity};
+  }
+}
+
+} // namespace
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info)
+{
+  LayerInfoMap info_by_name;
+
+  for (auto &&info : layers_info)
+  {
+    auto name = info.name;
+    bool found = false;
+    for (auto node : loco::active_nodes(loco::output_nodes(g)))
+    {
+      auto cnode = loco::must_cast<luci::CircleNode *>(node);
+      if (cnode->opcode() == luci::CircleOpcode::CIRCLEOUTPUT)
+        continue;
+
+      if (cnode->name() == name)
+      {
+        // Check and add multiple-output node and its outputs to info_by_name
+        if (const auto multi_output = get_multi_output_node(cnode))
+        {
+          add_multi_output_node(info_by_name, info, multi_output);
+          found = true;
+          continue;
+        }
+
+        if (info_by_name.find(name) != info_by_name.end())
+        {
+          throw std::runtime_error("Duplicate layer name " + name +
+                                   ". Check layer names in the quantization configuration file.");
+        }
+
+        info_by_name[name] = info;
+        found = true;
+        continue;
+      }
+    }
+
+    if (not found)
+      throw std::runtime_error("No such layer named " + name +
+                               ". Check layer names in the quantization configuration file.");
+  }
+
+  // TODO Check all names in layers_info exist in the info_by_name
+  // TODO Check names in info_by_name but not in layers_info are from virtual outputs
+
+  return info_by_name;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.h b/compiler/luci/pass/src/helpers/LayerInfoMap.h

new file mode 100644 (file)

index 0000000..bb4724a
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+#define __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
+
+#include <luci/Pass/QuantizationParameters.h>
+
+#include <unordered_map>
+
+namespace luci
+{
+
+using LayerInfoMap = std::unordered_map<std::string, luci::LayerInfo>;
+
+LayerInfoMap layer_info_map(loco::Graph *g, std::vector<LayerInfo> &layers_info);
+
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_LAYER_INFO_MAP_H__
diff --git a/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp

new file mode 100644 (file)

index 0000000..2ed28ed
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/LayerInfoMap.test.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "LayerInfoMap.h"
+
+#include <luci/IR/CircleNode.h>
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class SoftmaxTestGraph : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({32}, {32});
+    _softmax = g()->nodes()->create<luci::CircleSoftmax>();
+    {
+      _softmax->logits(input());
+      _softmax->beta(0.1);
+      _softmax->name("test");
+    }
+    output()->from(_softmax);
+  }
+
+private:
+  luci::CircleSoftmax *_softmax = nullptr;
+};
+
+class SplitAddTestGraph : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({6, 1, 2}, {3, 1, 2});
+    _split_dim = g()->nodes()->create<luci::CircleConst>();
+    {
+      _split_dim->rank(1);
+      _split_dim->dtype(loco::DataType::S32);
+      _split_dim->size<loco::DataType::S32>(1);
+      _split_dim->at<loco::DataType::S32>(0);
+      _split_dim->shape({1});
+      _split_dim->name("split_dim");
+    }
+
+    _split = g()->nodes()->create<luci::CircleSplit>();
+    {
+      _split->input(input());
+      _split->num_split(2);
+      _split->split_dim(_split_dim);
+      _split->name("split0");
+    }
+
+    _split_out_1 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_out_1->input(_split);
+      _split_out_1->index(0);
+      _split_out_1->name("split0");
+    }
+
+    _split_out_2 = g()->nodes()->create<luci::CircleSplitOut>();
+    {
+      _split_out_2->input(_split);
+      _split_out_2->index(1);
+      _split_out_2->name("split1");
+    }
+
+    _add = g()->nodes()->create<luci::CircleAdd>();
+    {
+      _add->x(_split_out_1);
+      _add->y(_split_out_2);
+      _add->name("add");
+    }
+    output()->from(_add);
+  }
+
+private:
+  luci::CircleSplit *_split = nullptr;
+  luci::CircleSplitOut *_split_out_1 = nullptr;
+  luci::CircleSplitOut *_split_out_2 = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+  luci::CircleAdd *_add = nullptr;
+};
+
+} // namespace
+
+TEST(LayerInfoMapTest, simple_test)
+{
+  SoftmaxTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  auto map = luci::layer_info_map(g.g(), v);
+
+  EXPECT_EQ("test", map["test"].name);
+  EXPECT_EQ(loco::DataType::U8, map["test"].dtype);
+  EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["test"].granularity);
+}
+
+TEST(LayerInfoMapTest, multiple_output_node_test)
+{
+  SplitAddTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "split0";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  auto map = luci::layer_info_map(g.g(), v);
+
+  EXPECT_EQ(map.size(), 2);
+  EXPECT_EQ("split0", map["split0"].name);
+  EXPECT_EQ("split1", map["split1"].name);
+
+  EXPECT_EQ(loco::DataType::U8, map["split0"].dtype);
+  EXPECT_EQ(luci::QuantizationGranularity::ChannelWise, map["split0"].granularity);
+}
+
+TEST(LayerInfoMapTest, invalid_layer_info_multiple_output_node_NEG)
+{
+  SplitAddTestGraph g;
+  g.init();
+
+  luci::LayerInfo info_0;
+  {
+    info_0.name = "split0";
+    info_0.dtype = loco::DataType::U8;
+    info_0.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  luci::LayerInfo info_1;
+  {
+    info_1.name = "split1";
+    info_1.dtype = loco::DataType::S16;
+    info_1.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info_0);
+  v.emplace_back(info_1);
+
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, duplicate_name_NEG)
+{
+  SoftmaxTestGraph g;
+  g.init();
+  g.input()->name("test");
+
+  luci::LayerInfo info;
+  {
+    info.name = "test";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
+
+TEST(LayerInfoMapTest, no_name_NEG)
+{
+  SoftmaxTestGraph g;
+  g.init();
+
+  luci::LayerInfo info;
+  {
+    info.name = "noname";
+    info.dtype = loco::DataType::U8;
+    info.granularity = luci::QuantizationGranularity::ChannelWise;
+  }
+  std::vector<luci::LayerInfo> v;
+  v.emplace_back(info);
+  EXPECT_ANY_THROW(luci::layer_info_map(g.g(), v));
+}
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake

index 3ccc5812834b3dbd793e26055881d6010049c1ec..e896188be111de0e2b5df957b489ef0136fb52e5 100644 (file)
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -4,8 +4,8 @@ require("loco")
  require("locop")
  require("logo")
  require("logo-core")
-require("mio-circle")
-require("mio-tflite")
+require("mio-circle04")
+require("mio-tflite280")
  require("oops")
  require("hermes")
  require("hermes-std")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt

index 0e6097f9631573397be201bb2fc45f14107ca2f1..24bdfc152c773bf3af29599f55bc36df81ac8132 100644 (file)
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -10,7 +10,6 @@ add_library(luci_service ${LUCI_LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_service PRIVATE src)
  target_include_directories(luci_service PUBLIC include)
  target_link_libraries(luci_service PUBLIC luci_lang)
-target_link_libraries(luci_service PUBLIC mio_circle)
  target_link_libraries(luci_service PUBLIC logo_core)
  target_link_libraries(luci_service PRIVATE luci_log)
  target_link_libraries(luci_service PRIVATE luci_logex)
diff --git a/compiler/luci/service/include/luci/Service/CircleShapeInference.h b/compiler/luci/service/include/luci/Service/CircleShapeInference.h

index ead12d074d76a4b07b77b210925831535adbfa2d..2c112094116c823d146a5bfa43134e6d9025f02f 100644 (file)
--- a/compiler/luci/service/include/luci/Service/CircleShapeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleShapeInference.h
@@ -17,11 +17,12 @@
  #ifndef __LUCI_CIRCLE_SHAPE_INFERENCE_H__
  #define __LUCI_CIRCLE_SHAPE_INFERENCE_H__
  
-#include <loco/IR/Nodes.h>
-
+#include <luci/Service/CircleShapeInferenceRule.h>
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleShapeInferenceRule.h>
+
+#include <loco/IR/NodeShape.h>
+#include <loco/IR/TensorShape.h>
  
  namespace luci
  {
diff --git a/compiler/luci/service/include/luci/Service/CircleTypeInference.h b/compiler/luci/service/include/luci/Service/CircleTypeInference.h

index d627313802296729f2b48d7746b831b4450b86cb..e0ceabeac8637da6d8bb85399a40b1f272330636 100644 (file)
--- a/compiler/luci/service/include/luci/Service/CircleTypeInference.h
+++ b/compiler/luci/service/include/luci/Service/CircleTypeInference.h
@@ -17,13 +17,11 @@
  #ifndef __LUCI_CIRCLE_TYPE_INFERENCE_H__
  #define __LUCI_CIRCLE_TYPE_INFERENCE_H__
  
-#include <loco/IR/Nodes.h>
-
-#include <mio/circle/schema_generated.h>
-
+#include <luci/Service/CircleTypeInferenceRule.h>
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
-#include <luci/Service/CircleTypeInferenceRule.h>
+
+#include <loco/IR/DataType.h>
  
  namespace luci
  {
diff --git a/compiler/luci/service/src/CircleCloneNode.h b/compiler/luci/service/src/CircleCloneNode.h

index 3926147f56f1a402850cf69906132c2ff4a223fc..99e4561b393e55854c82c1bbcac7da13271c8f49 100644 (file)
--- a/compiler/luci/service/src/CircleCloneNode.h
+++ b/compiler/luci/service/src/CircleCloneNode.h
@@ -208,6 +208,7 @@ public:
    luci::CircleNode *visit(const luci::CircleSquaredDifference *) final;
    luci::CircleNode *visit(const luci::CircleSqueeze *) final;
    luci::CircleNode *visit(const luci::CircleStridedSlice *) final;
+  luci::CircleNode *visit(const luci::CircleSVDF *) final;
    luci::CircleNode *visit(const luci::CircleSub *) final;
    luci::CircleNode *visit(const luci::CircleSum *) final;
    luci::CircleNode *visit(const luci::CircleTanh *) final;
@@ -269,6 +270,7 @@ public:
    luci::CircleNode *visit(const luci::CircleTopKV2Out *) final;
    luci::CircleNode *visit(const luci::CircleUniqueOut *) final;
    luci::CircleNode *visit(const luci::CircleUnpackOut *) final;
+  luci::CircleNode *visit(const luci::CircleVariable *) final;
    luci::CircleNode *visit(const luci::CircleWhileOut *) final;
  
    // Handle in CircleNode
diff --git a/compiler/luci/service/src/CircleNodeClone.cpp b/compiler/luci/service/src/CircleNodeClone.cpp

index d2033dd0cc7aa355eb260c0f4d2b9a170b2a749e..220c6096cff80a9fd013d925eaab811166c539c0 100644 (file)
--- a/compiler/luci/service/src/CircleNodeClone.cpp
+++ b/compiler/luci/service/src/CircleNodeClone.cpp
@@ -14,6 +14,7 @@
   * limitations under the License.
   */
  
+#include "luci/IR/CircleQuantParam.h"
  #include "luci/Service/CircleNodeClone.h"
  
  #include "CircleCloneNode.h"
@@ -45,18 +46,7 @@ void copy_common_attributes(const luci::CircleNode *src, luci::CircleNode *dst)
    dst->shape_status(src->shape_status());
  
    // quantparam
-  const auto *quantparam = src->quantparam();
-  if (quantparam != nullptr)
-  {
-    auto qparam = std::make_unique<luci::CircleQuantParam>();
-    qparam->scale = quantparam->scale;
-    qparam->zerop = quantparam->zerop;
-    qparam->min = quantparam->min;
-    qparam->max = quantparam->max;
-    qparam->quantized_dimension = quantparam->quantized_dimension;
-
-    dst->quantparam(std::move(qparam));
-  }
+  copy_quantparam(src, dst);
  
    // sparsity
    const auto *sparsity = src->sparsityparam();
diff --git a/compiler/luci/service/src/CircleShapeInferenceRule.cpp b/compiler/luci/service/src/CircleShapeInferenceRule.cpp

index 5d6a31050f2b218d418998bb2f68387a21eb7462..9d156f3e22fc912dcb65d56d747d9484de0640ea 100644 (file)
--- a/compiler/luci/service/src/CircleShapeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleShapeInferenceRule.cpp
@@ -1,5 +1,6 @@
  /*
   * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
@@ -196,23 +197,18 @@ template <class CIRCLENODE> loco::NodeShape broadcast_xy(const CIRCLENODE *node)
    return loco::NodeShape{output_shape};
  }
  
-template <class CIRCLENODE> loco::NodeShape use_inputs(const CIRCLENODE *node)
-{
-  auto inputs_shape = luci::shape_get(node->inputs()).template as<loco::TensorShape>();
-  return loco::NodeShape{inputs_shape};
-}
+#define DECLARE_USE_SINGLE(NAME)                                                        \
+  template <class CIRCLENODE> loco::NodeShape use_##NAME(const CIRCLENODE *node)        \
+  {                                                                                     \
+    auto inputs_shape = luci::shape_get(node->NAME()).template as<loco::TensorShape>(); \
+    return loco::NodeShape{inputs_shape};                                               \
+  }
  
-template <class CIRCLENODE> loco::NodeShape use_x(const CIRCLENODE *node)
-{
-  auto x_shape = luci::shape_get(node->x()).template as<loco::TensorShape>();
-  return loco::NodeShape{x_shape};
-}
+DECLARE_USE_SINGLE(inputs);
+DECLARE_USE_SINGLE(x);
+DECLARE_USE_SINGLE(logits);
  
-template <class CIRCLENODE> loco::NodeShape use_logits(const CIRCLENODE *node)
-{
-  auto shape = luci::shape_get(node->logits()).template as<loco::TensorShape>();
-  return loco::NodeShape{shape};
-}
+#undef DECLARE_USE_SINGLE
  
  template <class CIRCLENODE>
  loco::NodeShape use_paddings(const CIRCLENODE *node, const luci::CircleConst *paddings)
@@ -721,6 +717,8 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
    auto input_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
    auto weights_shape = luci::shape_get(node->weights()).as<loco::TensorShape>();
  
+// TODO Remove following unused code
+#if 0
    // Checking shape capability for fully connected layer
    // Input: a tensor of at least rank 2 [D1, D2, ... Dn]
    // Weight: [# of units, K]
@@ -741,6 +739,40 @@ loco::NodeShape infer_fully_connected(const luci::CircleFullyConnected *node)
    out_shape.rank(2);
    out_shape.dim(0) = batch_size;
    out_shape.dim(1) = weights_shape.dim(0);
+#endif
+
+  loco::TensorShape out_shape;
+
+  // NOTE Some recipes in some repositories are using rank 4 input for FullyConnected.
+  //      Until they are all fixed, disable following assert.
+  // TODO Enable following assert after related fixes are applied
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L194
+  // LUCI_ASSERT(input_shape.rank() == 2 || input_shape.rank() == 3,
+  //             "Input rank of FullyConnected should be 2 or 3");
+
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L225
+  LUCI_ASSERT(weights_shape.rank() == 2, "Weights of FullyConnected should be 2");
+
+  // https://github.com/tensorflow/tensorflow/blob/ea33c1e7a25d8025e8ee405ad8ab7be261798d76/tensorflow/lite/kernels/fully_connected.cc#L353-L367
+  if (node->keep_num_dims())
+  {
+    out_shape.rank(input_shape.rank());
+    for (uint32_t i = 0; i < input_shape.rank(); ++i)
+      out_shape.dim(i) = input_shape.dim(i);
+    out_shape.dim(out_shape.rank() - 1) = weights_shape.dim(0);
+  }
+  else
+  {
+    uint32_t input_size = 1;
+    for (uint32_t i = 0; i < input_shape.rank(); i++)
+    {
+      input_size = input_size * input_shape.dim(i).value();
+    }
+    const uint32_t batch_size = input_size / weights_shape.dim(1).value();
+    out_shape.rank(2);
+    out_shape.dim(0) = batch_size;
+    out_shape.dim(1) = weights_shape.dim(0);
+  }
  
    return loco::NodeShape{out_shape};
  }
@@ -1554,6 +1586,30 @@ loco::NodeShape infer_squeeze(const luci::CircleSqueeze *node)
    return loco::NodeShape{output_shape};
  }
  
+loco::NodeShape infer_svdf(const luci::CircleSVDF *node)
+{
+  const auto ifm_shape = luci::shape_get(node->input()).as<loco::TensorShape>();
+  const auto weight_feature_shape = luci::shape_get(node->weight_feature()).as<loco::TensorShape>();
+
+  assert(ifm_shape.rank() == 2);
+  assert(weight_feature_shape.rank() == 2);
+
+  assert(ifm_shape.dim(1) == weight_feature_shape.dim(1));
+  assert(weight_feature_shape.dim(0).known());
+
+  const auto rank = node->svdf_rank();
+  const auto num_filters = weight_feature_shape.dim(0).value();
+  assert(num_filters % rank == 0);
+  const auto num_units = num_filters / rank;
+
+  loco::TensorShape ofm_shape;
+  ofm_shape.rank(2);
+  ofm_shape.dim(0) = ifm_shape.dim(0);
+  ofm_shape.dim(1) = num_units;
+
+  return loco::NodeShape{ofm_shape};
+}
+
  loco::NodeShape infer_tile(const luci::CircleTile *node)
  {
    const loco::DataType S32 = loco::DataType::S32;
@@ -2393,6 +2449,8 @@ public:
      return loco::NodeShape{output_shape};
    }
  
+  loco::NodeShape visit(const luci::CircleSVDF *node) final { return infer_svdf(node); }
+
    loco::NodeShape visit(const luci::CircleTanh *node) final { return use_x(node); }
  
    loco::NodeShape visit(const luci::CircleTile *node) final { return infer_tile(node); }
@@ -2486,6 +2544,8 @@ public:
  
    loco::NodeShape visit(const luci::CircleUnpackOut *node) final { return infer_unpack_out(node); }
  
+  loco::NodeShape visit(const luci::CircleVariable *node) final { return use_own(node); }
+
    loco::NodeShape visit(const luci::CircleWhileOut *node) final { return infer_while_out(node); }
  };
  
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp

index 5f6d46f2b74da99629f88393f6dffaf178d1ecea..438c4a364644291f87ef3779ab4a9c1c88e3aa6b 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -478,6 +478,11 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
  
    loco::DataType visit(const luci::CircleSum *node) final { return luci::dtype_get(node->input()); }
  
+  loco::DataType visit(const luci::CircleSVDF *node) final
+  {
+    return luci::dtype_get(node->input());
+  }
+
    loco::DataType visit(const luci::CircleTanh *node) final { return luci::dtype_get(node->x()); }
  
    loco::DataType visit(const luci::CircleTile *node) final
@@ -605,6 +610,8 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return loco::DataType::S32;
    }
  
+  loco::DataType visit(const luci::CircleVariable *node) final { return node->dtype(); }
+
    loco::DataType visit(const luci::CircleUniqueOut *node) final
    {
      if (node->index() == 0)
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.cpp

new file mode 100644 (file)

index 0000000..d4c3ce8
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNodeLet<CN::STUV>::visit(const luci::CircleSVDF *node)
+{
+  if (node->fusedActivationFunction() == luci::FusedActFunc::UNDEFINED)
+    return nullptr;
+
+  auto *cloned = _graph->nodes()->create<luci::CircleSVDF>();
+  if (cloned != nullptr)
+  {
+    cloned->fusedActivationFunction(node->fusedActivationFunction());
+    cloned->asymmetric_quantize_inputs(node->asymmetric_quantize_inputs());
+    cloned->svdf_rank(node->svdf_rank());
+  }
+  return cloned;
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp

new file mode 100644 (file)

index 0000000..d6edaf1
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleSVDF.test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_SVDF)
+{
+  auto g = loco::make_graph();
+  auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+  node_svdf->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_svdf, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_svdf = dynamic_cast<luci::CircleSVDF *>(cloned);
+  ASSERT_NE(nullptr, cloned_svdf);
+  ASSERT_EQ(node_svdf->asymmetric_quantize_inputs(), cloned_svdf->asymmetric_quantize_inputs());
+  ASSERT_EQ(node_svdf->svdf_rank(), cloned_svdf->svdf_rank());
+}
+
+TEST(CloneNodeTest, clone_SVDF_NEG)
+{
+  auto g = loco::make_graph();
+  auto node_svdf = g->nodes()->create<luci::CircleSVDF>();
+  node_svdf->fusedActivationFunction(luci::FusedActFunc::UNDEFINED);
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_svdf, gc.get());
+  ASSERT_EQ(nullptr, cloned);
+}
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.cpp b/compiler/luci/service/src/Nodes/CircleVariable.cpp

new file mode 100644 (file)

index 0000000..c1430bd
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleCloneNode.h"
+
+namespace luci
+{
+
+luci::CircleNode *CloneNode::visit(const luci::CircleVariable *)
+{
+  return _graph->nodes()->create<luci::CircleVariable>();
+}
+
+} // namespace luci
diff --git a/compiler/luci/service/src/Nodes/CircleVariable.test.cpp b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp

new file mode 100644 (file)

index 0000000..7d29438
--- /dev/null
+++ b/compiler/luci/service/src/Nodes/CircleVariable.test.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Service/CircleNodeClone.h"
+
+#include <gtest/gtest.h>
+
+TEST(CloneNodeTest, clone_Variable)
+{
+  auto g = loco::make_graph();
+  auto node_dummy = g->nodes()->create<luci::CircleVariable>();
+
+  auto gc = loco::make_graph();
+  auto cloned = luci::clone_node(node_dummy, gc.get());
+  ASSERT_NE(nullptr, cloned);
+  ASSERT_EQ(gc.get(), cloned->graph());
+
+  auto cloned_variable = dynamic_cast<luci::CircleVariable *>(cloned);
+  ASSERT_NE(nullptr, cloned_variable);
+}
diff --git a/compiler/luci/tests/CMakeLists.txt b/compiler/luci/tests/CMakeLists.txt

index c03835823e62ce43b889a270912225b9a0d003f2..1333efb7d80040407c7ec538f5e7c14da0016d9a 100644 (file)
--- a/compiler/luci/tests/CMakeLists.txt
+++ b/compiler/luci/tests/CMakeLists.txt
@@ -1,3 +1,14 @@
+set(CIRCLECHEF_FILE_PATH $<TARGET_FILE:circlechef-file>)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLITE2CIRCLE_PATH $<TARGET_FILE:tflite2circle>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(CIRCLECHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/circlechef/tools/file/circlechef-file)
+  set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+  set(TFLITE2CIRCLE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflite2circle/tflite2circle)
+  message(STATUS "TFLITE2CIRCLE_PATH = ${TFLITE2CIRCLE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+
  # TODO use local test.recipe files for small networks
  file(GLOB RECIPES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*/test.recipe")
  
@@ -17,14 +28,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .tflite
    add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
-                     COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
-                     DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+                     DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    # Generate .circle
    add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+                     COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
                       COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
  
    list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -52,14 +63,14 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .tflite
    add_custom_command(OUTPUT "${RECIPE_OUTPUT_FILE}"
-                     COMMAND tflchef-file "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
-                     DEPENDS tflchef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${RECIPE_OUTPUT_FILE}"
+                     DEPENDS ${TFLCHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    # Generate .circle
    add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND tflite2circle "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS tflite2circle "${RECIPE_OUTPUT_FILE}"
+                     COMMAND ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${TFLITE2CIRCLE_PATH} "${RECIPE_OUTPUT_FILE}"
                       COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
  
    list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -87,8 +98,8 @@ foreach(RECIPE IN ITEMS ${RECIPES2})
  
    # Generate .circle
    add_custom_command(OUTPUT "${CIRCLE_OUTPUT_FILE}"
-                     COMMAND circlechef-file "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
-                     DEPENDS circlechef-file "${RECIPE_SOURCE_FILE}"
+                     COMMAND ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}" "${CIRCLE_OUTPUT_FILE}"
+                     DEPENDS ${CIRCLECHEF_FILE_PATH} "${RECIPE_SOURCE_FILE}"
                       COMMENT "Generating ${CIRCLE_OUTPUT_FILE}")
  
    list(APPEND TESTFILES "${CIRCLE_OUTPUT_FILE}")
@@ -111,6 +122,8 @@ include("test.lst")
  # Read "test.local.lst" if exists
  include("test.local.lst" OPTIONAL)
  
+# NOTE $<TARGET_FILE:luci_readtester> is used as-is as test itself should
+#      run in target device for cross build also
  add_test(NAME luci_unit_readtest
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/readverify.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
diff --git a/compiler/luci/tests/test.lst b/compiler/luci/tests/test.lst

index 28ddcf672278909567e66b08f091f8ac895b426d..94e723f2189e81576c0eee14fdf1c8868dda9b86 100644 (file)
--- a/compiler/luci/tests/test.lst
+++ b/compiler/luci/tests/test.lst
@@ -180,6 +180,8 @@ addread(Sub_000)
  addread(Sub_U8_000)
  addread(Sum_000)
  addread(Sum_001)
+addread(SVDF_000)
+addread(SVDF_001)
  addread(Tanh_000)
  addread(Tanh_U8_000)
  addread(Tile_000)
@@ -403,6 +405,8 @@ addwrite(Sub_000)
  addwrite(Sub_U8_000)
  addwrite(Sum_000)
  addwrite(Sum_001)
+addwrite(SVDF_000)
+addwrite(SVDF_001)
  addwrite(Tanh_000)
  addwrite(Tanh_U8_000)
  addwrite(Tile_000)
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt

index fa05ef0fa024c52589bba8c53b53c355ed054cf3..d2471734398c49866f4c2b85e9f5494aac049656 100644 (file)
--- a/compiler/mio-circle/CMakeLists.txt
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -1,13 +1,14 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
  if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle skip: FlatBuffers 2.0 NOT FOUND")
    return()
  endif(NOT FlatBuffers_FOUND)
  
  message(STATUS "Build mio-circle: TRUE")
  
  # TODO Find a better way
-set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.3/circle_schema.fbs")
  
  # NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
  add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
@@ -26,3 +27,10 @@ FlatBuffers_Target(mio_circle
  # This example shows how to use "mio-circle" library
  add_executable(mio_circle_example example.cpp)
  target_link_libraries(mio_circle_example mio_circle)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+add_library(mio_circle_helper STATIC ${SOURCES})
+target_include_directories(mio_circle_helper PRIVATE src)
+target_include_directories(mio_circle_helper PUBLIC include)
+target_link_libraries(mio_circle_helper mio_circle)
diff --git a/compiler/mio-circle/include/mio_circle/Helper.h b/compiler/mio-circle/include/mio_circle/Helper.h

new file mode 100644 (file)

index 0000000..c0f8115
--- /dev/null
+++ b/compiler/mio-circle/include/mio_circle/Helper.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE_HELPER_H__
+#define __MIO_CIRCLE_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+namespace mio
+{
+namespace circle
+{
+
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE_HELPER_H__
diff --git a/compiler/mio-circle/src/Helper.cpp b/compiler/mio-circle/src/Helper.cpp

new file mode 100644 (file)

index 0000000..6f30c8c
--- /dev/null
+++ b/compiler/mio-circle/src/Helper.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (::circle::BuiltinOperator_MIN <= code && code <= ::circle::BuiltinOperator_MAX);
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = opcode->builtin_code();
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/CMakeLists.txt b/compiler/mio-circle04/CMakeLists.txt

new file mode 100644 (file)

index 0000000..8ee6da4
--- /dev/null
+++ b/compiler/mio-circle04/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle04 skip: FlatBuffers 2.0 NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle04: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.4/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_circle04
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+# This example shows how to use "mio-circle04" library
+add_executable(mio_circle04_example example.cpp)
+target_link_libraries(mio_circle04_example mio_circle04)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle04_helper STATIC ${SOURCES})
+set_target_properties(mio_circle04_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle04_helper PRIVATE src)
+target_include_directories(mio_circle04_helper PUBLIC include)
+target_link_libraries(mio_circle04_helper mio_circle04)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle04_helper_test ${TESTS})
+target_include_directories(mio_circle04_helper_test PRIVATE src)
+target_link_libraries(mio_circle04_helper_test mio_circle04)
+target_link_libraries(mio_circle04_helper_test mio_circle04_helper)
diff --git a/compiler/mio-circle04/README.md b/compiler/mio-circle04/README.md

new file mode 100644 (file)

index 0000000..d12dd78
--- /dev/null
+++ b/compiler/mio-circle04/README.md
@@ -0,0 +1,3 @@
+# mio-circle04
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle04/example.cpp b/compiler/mio-circle04/example.cpp

new file mode 100644 (file)

index 0000000..1970f40
--- /dev/null
+++ b/compiler/mio-circle04/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle04"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-circle04/include/mio_circle/Helper.h b/compiler/mio-circle04/include/mio_circle/Helper.h

new file mode 100644 (file)

index 0000000..d3ffc23
--- /dev/null
+++ b/compiler/mio-circle04/include/mio_circle/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE04_HELPER_H__
+#define __MIO_CIRCLE04_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE04_HELPER_H__
diff --git a/compiler/mio-circle04/src/Helper.cpp b/compiler/mio-circle04/src/Helper.cpp

new file mode 100644 (file)

index 0000000..8b8737a
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ *      we cannot use std::max() like tflite as deprecated_builtin_code can be
+ *      minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+    return false;
+  // There was no 255(-1) value in v0.3
+  if (deprecated_builtin_code == -1)
+    return false;
+
+  const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::circle::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
+
+  return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle04/src/Helper.test.cpp b/compiler/mio-circle04/src/Helper.test.cpp

new file mode 100644 (file)

index 0000000..20fce08
--- /dev/null
+++ b/compiler/mio-circle04/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle04_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         circle::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const circle::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle04_helper_test, v04)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_old)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle04_helper_test, v04_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt

index 4660e40039db845da605544bf3065beab7a32cf7..90187b037d37b3fedc29596975f1e1fa9ad49295 100644 (file)
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)")
diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt

index 39f4d9a311565f8ac1062c856a6bc471d48d6b98..f2cfeafcc117d098e6340c19423736b745f56417 100644 (file)
--- a/compiler/mio-tflite260/CMakeLists.txt
+++ b/compiler/mio-tflite260/CMakeLists.txt
@@ -1,7 +1,7 @@
-nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
  if(NOT FlatBuffers_FOUND)
-  message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)")
+  message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 2.0)")
    return()
  endif(NOT FlatBuffers_FOUND)
  
@@ -47,3 +47,23 @@ endif(NOT TensorFlowGEMMLowpSource_FOUND)
  add_library(mio_tflite260_inc INTERFACE)
  target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
  target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite260_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite260_helper PRIVATE src)
+target_include_directories(mio_tflite260_helper PUBLIC include)
+target_link_libraries(mio_tflite260_helper mio_tflite260)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite260_helper_test ${TESTS})
+target_include_directories(mio_tflite260_helper_test PRIVATE src)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260)
+target_link_libraries(mio_tflite260_helper_test mio_tflite260_helper)
diff --git a/compiler/mio-tflite260/include/mio_tflite260/Helper.h b/compiler/mio-tflite260/include/mio_tflite260/Helper.h

new file mode 100644 (file)

index 0000000..cb027e6
--- /dev/null
+++ b/compiler/mio-tflite260/include/mio_tflite260/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE260_HELPER_H__
+#define __MIO_TFLITE260_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE260_HELPER_H__
diff --git a/compiler/mio-tflite260/src/Helper.cpp b/compiler/mio-tflite260/src/Helper.cpp

new file mode 100644 (file)

index 0000000..9669058
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  return std::max(opcode->builtin_code(),
+                  static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+  // Valid Range : 0 <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < 0)
+    return false;
+
+  const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::tflite::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+  return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite260/src/Helper.test.cpp b/compiler/mio-tflite260/src/Helper.test.cpp

new file mode 100644 (file)

index 0000000..e1ef04c
--- /dev/null
+++ b/compiler/mio-tflite260/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite260/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite260_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         tflite::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const tflite::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite260_helper_test, v3)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_custom)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite260_helper_test, v3a_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-tflite280/CMakeLists.txt b/compiler/mio-tflite280/CMakeLists.txt

new file mode 100644 (file)

index 0000000..f48711e
--- /dev/null
+++ b/compiler/mio-tflite280/CMakeLists.txt
@@ -0,0 +1,69 @@
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Build mio-tflite280: FAILED (missing Flatbuffers 2.0)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build mio-tflite280: FAILED (missing TensorFlowSource 2.8.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite280: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite280
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite280_example example.cpp)
+target_link_libraries(mio_tflite280_example mio_tflite280)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite280_validate example.cpp)
+target_link_libraries(mio_tflite280_validate mio_tflite280)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.8.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite280_inc INTERFACE)
+target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite280_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_tflite280_helper STATIC ${SOURCES})
+target_include_directories(mio_tflite280_helper PRIVATE src)
+target_include_directories(mio_tflite280_helper PUBLIC include)
+target_link_libraries(mio_tflite280_helper mio_tflite280)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_tflite280_helper_test ${TESTS})
+target_include_directories(mio_tflite280_helper_test PRIVATE src)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280)
+target_link_libraries(mio_tflite280_helper_test mio_tflite280_helper)
diff --git a/compiler/mio-tflite280/README.md b/compiler/mio-tflite280/README.md

new file mode 100644 (file)

index 0000000..73219a7
--- /dev/null
+++ b/compiler/mio-tflite280/README.md
@@ -0,0 +1,3 @@
+# mio-tflite280
+
+_mio-tflite280_ provides a library to access TensorFlow lite model files with V2.8.0.
diff --git a/compiler/mio-tflite280/example.cpp b/compiler/mio-tflite280/example.cpp

new file mode 100644 (file)

index 0000000..83356b9
--- /dev/null
+++ b/compiler/mio-tflite280/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite280"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-tflite280/include/mio_tflite280/Helper.h b/compiler/mio-tflite280/include/mio_tflite280/Helper.h

new file mode 100644 (file)

index 0000000..b0fb0ac
--- /dev/null
+++ b/compiler/mio-tflite280/include/mio_tflite280/Helper.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_TFLITE280_HELPER_H__
+#define __MIO_TFLITE280_HELPER_H__
+
+#include <mio/tflite/schema_generated.h>
+
+namespace mio
+{
+namespace tflite
+{
+
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode);
+bool is_valid(const ::tflite::OperatorCode *opcode);
+bool is_custom(const ::tflite::OperatorCode *opcode);
+std::string opcode_name(const ::tflite::OperatorCode *opcode);
+const char *tensor_type(const ::tflite::Tensor *tensor);
+const char *tensor_name(const ::tflite::Tensor *tensor);
+
+} // namespace tflite
+} // namespace mio
+
+#endif // __MIO_TFLITE280_HELPER_H__
diff --git a/compiler/mio-tflite280/src/Helper.cpp b/compiler/mio-tflite280/src/Helper.cpp

new file mode 100644 (file)

index 0000000..ebf0bd1
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <sstream>
+
+namespace mio
+{
+namespace tflite
+{
+
+/**
+ * This will provide v3/v3a format neutral BuiltinOperator
+ *
+ * This function referenced
+ * https://github.com/tensorflow/tensorflow/blob/7d12007d7800d3714a02e05059f3ea602d1aec78/tensorflow/lite/schema/schema_utils.cc
+ */
+::tflite::BuiltinOperator builtin_code_neutral(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  return std::max(opcode->builtin_code(),
+                  static_cast<::tflite::BuiltinOperator>(opcode->deprecated_builtin_code()));
+}
+
+bool is_valid(const ::tflite::OperatorCode *opcode)
+{
+  // Valid Range : 0 <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < 0)
+    return false;
+
+  const ::tflite::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::tflite::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::tflite::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::tflite::OperatorCode *opcode)
+{
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::tflite::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::tflite::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::tflite::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::tflite::Tensor *tensor)
+{
+  return ::tflite::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::tflite::Tensor *tensor)
+{
+  static const char *kEmptyTensorName = "(noname)";
+
+  auto name = tensor->name();
+  if (name)
+    return name->c_str();
+
+  return kEmptyTensorName;
+}
+
+} // namespace tflite
+} // namespace mio
diff --git a/compiler/mio-tflite280/src/Helper.test.cpp b/compiler/mio-tflite280/src/Helper.test.cpp

new file mode 100644 (file)

index 0000000..df573bf
--- /dev/null
+++ b/compiler/mio-tflite280/src/Helper.test.cpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_tflite280/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_tflite280_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(tflite::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         tflite::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(tflite::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const tflite::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return tflite::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<tflite::OperatorCode>> _opcodes_vec;
+};
+
+/**
+ * Extended 'builtin_code' is not in TFLite schema v3.
+ *
+ * Thus it is filled with 0(BuiltinOperator_ADD) in schame v3. Please refer to
+ * https://github.com/tensorflow/tensorflow/blob/1ab788fa8d08430be239ab970980b891ad7af494/tensorflow/lite/schema/schema_utils.cc#L28-L31
+ */
+TEST_F(mio_tflite280_helper_test, v3)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_custom)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", tflite::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::tflite::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::tflite::builtin_code_neutral(get_operator_code(0)),
+            tflite::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::tflite::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_tflite280_helper_test, v3a_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", tflite::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::tflite::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt

index e6eb13b93eb85a7a428abf93907c2a86d6d52c3e..04c22055e4b9f18fb706eb368789c1c2f7706236 100644 (file)
--- a/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_onnx_importer/CMakeLists.txt
@@ -112,6 +112,10 @@ target_include_directories(mir_onnx_importer PUBLIC ../../include/mir_onnx_impor
  target_include_directories(mir_onnx_importer PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
  target_link_libraries(mir_onnx_importer PUBLIC mir mir_onnx_proto PRIVATE mir_interpreter nncc_common)
  
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
  nnas_find_package(GTest REQUIRED)
  
  file(GLOB_RECURSE TEST_SOURCES "*.test.cpp")
diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt

index 42eb4f8a58149f39fd20b8ce58e31e7db76f4f7c..6c6c28a32de5aa2a1a177bd132358200849a46e7 100644 (file)
--- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED)
+nnas_find_package(FlatBuffers EXACT 2.0 REQUIRED)
  
  if (NOT FlatBuffers_FOUND)
      return()
diff --git a/compiler/mir2loco/CMakeLists.txt b/compiler/mir2loco/CMakeLists.txt

index a8a096ef4d2a43df59447582050d3aca6381d4c9..217f1bd151b0eb2ea9e0751418977f16988071cf 100644 (file)
--- a/compiler/mir2loco/CMakeLists.txt
+++ b/compiler/mir2loco/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(mir2loco PUBLIC include)
  target_link_libraries(mir2loco PUBLIC mir)
  target_link_libraries(mir2loco PUBLIC loco)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  GTest_AddTest(mir2loco_test ${TESTS})
  target_link_libraries(mir2loco_test mir2loco)
diff --git a/compiler/moco-tf/CMakeLists.txt b/compiler/moco-tf/CMakeLists.txt

index 7c42761ba7132f57f0826260a5a30f158800251d..95669264f83f10c0d52465789bcd6c96c89ba10a 100644 (file)
--- a/compiler/moco-tf/CMakeLists.txt
+++ b/compiler/moco-tf/CMakeLists.txt
@@ -26,6 +26,7 @@ target_link_libraries(moco_tf_frontend PRIVATE locomotiv)
  target_link_libraries(moco_tf_frontend PRIVATE plier_tf)
  target_link_libraries(moco_tf_frontend PRIVATE locoex_customop)
  target_link_libraries(moco_tf_frontend PRIVATE logo)
+target_link_libraries(moco_tf_frontend PRIVATE logo_ex)
  target_link_libraries(moco_tf_frontend PRIVATE oops)
  install(TARGETS moco_tf_frontend DESTINATION lib)
  
@@ -46,4 +47,5 @@ target_link_libraries(moco_tf_frontend_test moco_tf_frontend)
  target_link_libraries(moco_tf_frontend_test plier_tf)
  target_link_libraries(moco_tf_frontend_test locoex_customop)
  target_link_libraries(moco_tf_frontend_test logo)
+target_link_libraries(moco_tf_frontend_test logo_ex)
  add_test(moco_tf_frontend_test moco_tf_frontend_test)
diff --git a/compiler/moco-tf/requires.cmake b/compiler/moco-tf/requires.cmake

index 90590e37430dc5a0b79b75caa6c40dff3a45fd82..71755556c4acdb99089868b4a0b28da9a66da5fc 100644 (file)
--- a/compiler/moco-tf/requires.cmake
+++ b/compiler/moco-tf/requires.cmake
@@ -9,5 +9,6 @@ require("mio-tf")
  require("plier-tf")
  require("locoex-customop")
  require("logo")
+require("logo-ex")
  require("oops")
  require("bino")
diff --git a/compiler/moco-tf/src/Transforms.h b/compiler/moco-tf/src/Transforms.h

index f14b816757c9e429bc17eff3ca38077eef17603f..a197a796e2698ceacc779f6f600913d9389ebce2 100644 (file)
--- a/compiler/moco-tf/src/Transforms.h
+++ b/compiler/moco-tf/src/Transforms.h
@@ -21,6 +21,7 @@
  #include "Transforms/TypeInferencePass.h"
  
  #include <logo/Passes.h>
+#include <logo/PassesEx.h>
  #include <moco/Pass/Passes.h>
  
  #endif // __MOCO_TF_TRANSFORMS_H__
diff --git a/compiler/morph/CMakeLists.txt b/compiler/morph/CMakeLists.txt

index ec7da8d30b34b08e1638dedec2c42e67a0faefc6..5a5ae2623c4524a389faaddc7499dff2a1f837c5 100644 (file)
--- a/compiler/morph/CMakeLists.txt
+++ b/compiler/morph/CMakeLists.txt
@@ -8,11 +8,11 @@ target_include_directories(morph PUBLIC include)
  target_link_libraries(morph PRIVATE nncc_common)
  target_link_libraries(morph PUBLIC angkor)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  add_executable(morph_test ${TESTS})
  target_link_libraries(morph_test morph)
diff --git a/compiler/nest/core/CMakeLists.txt b/compiler/nest/core/CMakeLists.txt

index b603f9ae9e39b20675bfd3873ea7d7bfd574694f..4f17db3b4dcb17839674ffbadffaf90abe85182f 100644 (file)
--- a/compiler/nest/core/CMakeLists.txt
+++ b/compiler/nest/core/CMakeLists.txt
@@ -15,11 +15,11 @@ foreach(EXAMPLE_FILE IN ITEMS ${EXAMPLE_FILES})
     target_link_libraries(${TARGET_NAME} nest_core)
  endforeach(EXAMPLE_FILE)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  add_executable(nest_core_test ${TESTS})
  target_link_libraries(nest_core_test gtest_main)
diff --git a/compiler/nike/CMakeLists.txt b/compiler/nike/CMakeLists.txt

index 737c73b8fc9e71d88326e03f37a118f1b91bba74..6bd3199e35616fcdb16792e4cc4bcb762ded930e 100644 (file)
--- a/compiler/nike/CMakeLists.txt
+++ b/compiler/nike/CMakeLists.txt
@@ -5,11 +5,11 @@ list(REMOVE_ITEM SOURCES ${TESTS})
  add_library(nike STATIC ${SOURCES})
  target_include_directories(nike PUBLIC include)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  GTest_AddTest(nike_test ${TESTS})
  target_link_libraries(nike_test nike)
diff --git a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp

index d38385e9190d33ce718491b197569ce779bc37af..c2135c4be35a22cd6fb258e2a01f02c536ed3f06 100644 (file)
--- a/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
+++ b/compiler/nnc/unittests/soft_backend/ModelAnalyzer.cpp
@@ -22,6 +22,8 @@
  
  #include <gtest/gtest.h>
  
+#include <algorithm>
+
  using namespace std;
  using namespace nnc;
  using namespace mir;
diff --git a/compiler/nnop/CMakeLists.txt b/compiler/nnop/CMakeLists.txt

index 82c0e3a867b2df95864bc81669b4c74374f6ddcb..d2c8af26d5d2e46dbcc13a8b7b59be4d5eac5d01 100644 (file)
--- a/compiler/nnop/CMakeLists.txt
+++ b/compiler/nnop/CMakeLists.txt
@@ -2,11 +2,11 @@ add_library(nnop INTERFACE)
  target_include_directories(nnop INTERFACE include)
  target_link_libraries(nnop INTERFACE angkor)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt

index 729bfa80ad0790d673feb0f2088396d0b5447f49..8732340ae289e6def6ee2fcbd07c7bb4256c0cd0 100644 (file)
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -14,6 +14,11 @@ set(ONE_COMMAND_FILES
      onecc
  )
  
+# pytorch importer is an experimental feature, it is not used in default configuration
+if(ENABLE_ONE_IMPORT_PYTORCH)
+  list(APPEND ONE_COMMAND_FILES one-import-pytorch)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
+
  foreach(ONE_COMMAND IN ITEMS ${ONE_COMMAND_FILES})
  
    set(ONE_COMMAND_FILE ${ONE_COMMAND})
@@ -41,6 +46,7 @@ set(ONE_UTILITY_FILES
      one-build.template.cfg
      onecc.template.cfg
      utils.py
+    onnx_legalizer.py
  )
  
  foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
@@ -66,6 +72,39 @@ foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
  
  endforeach(ONE_UTILITY)
  
+# make python directory
+set(ONE_PYTHON_FILES constant.py
+                     make_cmd.py)
+
+foreach(ONE_PYTHON_FILE IN ITEMS ${ONE_PYTHON_FILES})
+
+  set(ONE_PYTHON_DIR "onelib")
+  set(ONE_PYTHON_DIR_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}")
+  set(ONE_PYTHON_FILE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+  set(ONE_PYTHON_FILE_BIN "${CMAKE_CURRENT_BINARY_DIR}/${ONE_PYTHON_DIR}/${ONE_PYTHON_FILE}")
+  set(ONE_PYTHON_TARGET "${ONE_PYTHON_FILE}_target")
+
+  add_custom_command(OUTPUT ${ONE_PYTHON_DIR_BIN}
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${ONE_PYTHON_DIR_BIN}"
+    COMMENT "Generate ${ONE_PYTHON_DIR_BIN}"
+  )
+
+  add_custom_command(OUTPUT ${ONE_PYTHON_FILE_BIN}
+    COMMAND ${CMAKE_COMMAND} -E copy "${ONE_PYTHON_FILE_SRC}" "${ONE_PYTHON_FILE_BIN}"
+    DEPENDS ${ONE_PYTHON_SRC}
+    COMMENT "Generate ${ONE_PYTHON_FILE_BIN}"
+  )
+
+  add_custom_target(${ONE_PYTHON_TARGET} ALL DEPENDS ${ONE_PYTHON_DIR_BIN} ${ONE_PYTHON_FILE_BIN})
+
+  install(DIRECTORY ${ONE_PYTHON_DIR}
+          FILE_PERMISSIONS OWNER_WRITE OWNER_READ
+                           GROUP_READ
+                           WORLD_READ
+          DESTINATION bin)
+
+endforeach(ONE_PYTHON_FILE)
+
  set(ONE_DOCUMENT_FILES
      how-to-use-one-commands.txt
      how-to-prepare-virtualenv.txt
diff --git a/compiler/one-cmds/how-to-prepare-virtualenv.txt b/compiler/one-cmds/how-to-prepare-virtualenv.txt

index 6d846c0813a71e25c894c05182e420abab6167a2..8d6007f38a12aef7aef47057771fcd94924054af 100644 (file)
--- a/compiler/one-cmds/how-to-prepare-virtualenv.txt
+++ b/compiler/one-cmds/how-to-prepare-virtualenv.txt
@@ -5,7 +5,7 @@ Last update: 2020-09-15
  
  This document explains about 'one-prepare-venv' command.
  
-'one-prepare-venv' will prepare python3 virtual environment with tensorflow-cpu
+'one-prepare-venv' will prepare python3.8 virtual environment with tensorflow-cpu
  version 2.3.0, recommanded 2.x version as of now, so that 'one-import-tf'
  command can execute properly.
  
@@ -20,7 +20,7 @@ Please install these required packages before venv preparation.
  
  $ sudo apt-get update
  $ sudo apt-get upgrade
-$ sudo apt-get install python3-pip python3-venv
+$ sudo apt-get install python3.8 python3-pip python3.8-venv
  
  
  How to run for Ubuntu
@@ -36,18 +36,9 @@ There will be venv folder as of result.
  How to run for Windows
  ----------------------
  
-1. First, please prepare Python 3.5-3.7
-2. Open the Command Prompt as an administrator
-3. cd(change directory) to the directory where one-compiler is installed
-4. run below command
-```
-$ ONE\install\bin> python -m venv venv
-$ ONE\install\bin> cd venv/Scripts
-$ ONE\install\bin\venv/Scripts> pip.exe install -U pip
-$ ONE\install\bin\venv/Scripts> pip.exe install -U tensorflow-cpu==2.3.0
-```
-
-After running the above command, go back to MinGW and run one-compiler.
+Support for Windows is not maintained for now.
+If you have any needs for running in Windows, please fire an issue.
+Or you can use Docker for Windows.
  
  
  Trouble shooting
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt

index 0a0c4b14ce9c0ae3ae50c752f82955ad4848d9f5..ebc1651676e827859af1d45aec65ec30b3559137 100644 (file)
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -155,6 +155,7 @@ Current transformation options are
  - fold_cast : This removes Cast operation which can be folded
  - fold_dequantize : This removes Dequantize operation which can be folded
  - fold_dwconv : This folds Depthwise Convolution operation which can be folded
+- fold_gather : This removes Gather operation which can be folded
  - fold_sparse_to_dense : This removes SparseToDense operation which can be folded
  - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
  - fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
@@ -178,6 +179,7 @@ Current transformation options are
  - generate_profile_data : This will turn on profiling data generation.
  - remove_fakequant : This will remove all fakequant operators.
  - remove_quantdequant : This will remove all Quantize-Dequantize sequence.
+- remove_redundant_quantize : This removes redundant quantize operators.
  - remove_redundant_reshape : This fuses or removes redundant reshape operators.
  - remove_redundant_transpose : This fuses or removes redundant transpose operators.
  - remove_unnecessary_reshape : This removes unnecessary reshape operators.
diff --git a/compiler/one-cmds/one-build b/compiler/one-cmds/one-build

index 90dfa77b8a0a6a5c6f1c6c5e54be7058b151255a..5c313b44b4308dc2832ae927ccd998d2b5550f81 100644 (file)
--- a/compiler/one-cmds/one-build
+++ b/compiler/one-cmds/one-build
@@ -154,25 +154,31 @@ def main():
      config = _parse_cfg(args)
  
      # verify configuration file
-    drivers = [
-        'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
-        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen'
+    bin_dir = os.path.dirname(os.path.realpath(__file__))
+    import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+    transform_drivers = [
+        'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
      ]
-    _verify_cfg(drivers, config)
+    _verify_cfg(import_drivers_dict, config)
  
      # verify optimization option file
      _verify_opt(args)
  
      # get sections to run
      section_to_run = []
-    for d in drivers:
+    for d in list(import_drivers_dict) + transform_drivers:
          if _is_available_driver(config, d):
              section_to_run.append(d)
  
      # run
      dir_path = os.path.dirname(os.path.realpath(__file__))
      for section in section_to_run:
-        driver_path = os.path.join(dir_path, _get_driver_name(section))
+        if section in import_drivers_dict:
+            # we already has driver name in dict
+            driver_name = import_drivers_dict[section]
+        else:
+            driver_name = _get_driver_name(section)
+        driver_path = os.path.join(dir_path, driver_name)
          cmd = [driver_path, '--config', getattr(args, 'config'), '--section', section]
          if section == 'one-optimize' and _utils._is_valid_attr(args, 'O'):
              cmd += ['-O', getattr(args, 'O')]
diff --git a/compiler/one-cmds/one-import-bcq b/compiler/one-cmds/one-import-bcq

index 9aef6270e94069a16e1755980200508b4e5cf96a..ef89a9297b51f4bbf301d61575cb9bc9af85009c 100644 (file)
--- a/compiler/one-cmds/one-import-bcq
+++ b/compiler/one-cmds/one-import-bcq
@@ -25,6 +25,7 @@ import subprocess
  import sys
  import tempfile
  
+import onelib.make_cmd as _make_cmd
  import utils as _utils
  import generate_bcq_output_arrays as _bcq_info_gen
  
@@ -32,6 +33,10 @@ import generate_bcq_output_arrays as _bcq_info_gen
  sys.tracebacklimit = 0
  
  
+def get_driver_cfg_section():
+    return "one-import-bcq"
+
+
  def _get_parser():
      parser = argparse.ArgumentParser(
          description='command line tool to convert TensorFlow with BCQ to circle')
@@ -155,7 +160,7 @@ def _convert(args):
              tmpdir,
              os.path.splitext(
                  os.path.basename(generate_bcq_metadata_output_path))[0]) + '.tflite'
-        tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
                                                         generate_bcq_metadata_output_path,
                                                         tf2tfliteV2_output_path)
          try:
@@ -171,7 +176,7 @@ def _convert(args):
  
          # make a command to convert from tflite to circle
          tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
-        tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
                                                             tf2tfliteV2_output_path,
                                                             getattr(args, 'output_path'))
  
diff --git a/compiler/one-cmds/one-import-onnx b/compiler/one-cmds/one-import-onnx

index 1c0c5498ef3088592d88c0bc0a017fb17de20585..eaa1361979d8635e36b491a5a5835d73bcc2539c 100644 (file)
--- a/compiler/one-cmds/one-import-onnx
+++ b/compiler/one-cmds/one-import-onnx
@@ -27,12 +27,25 @@ import tempfile
  import onnx
  import onnx_tf
  
+# ONNX legalizer is an optional feature
+# It enables conversion of some operations, but in experimental phase for now
+try:
+    import onnx_legalizer
+    _onnx_legalizer_enabled = True
+except ImportError:
+    _onnx_legalizer_enabled = False
+
+import onelib.make_cmd as _make_cmd
  import utils as _utils
  
  # TODO Find better way to suppress trackback on error
  sys.tracebacklimit = 0
  
  
+def get_driver_cfg_section():
+    return "one-import-onnx"
+
+
  def _get_parser():
      parser = argparse.ArgumentParser(
          description='command line tool to convert ONNX to circle')
@@ -64,6 +77,10 @@ def _get_parser():
      tf2tfliteV2_group.add_argument('--model_format', default='saved_model')
      tf2tfliteV2_group.add_argument('--converter_version', default='v2')
  
+    parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+    parser.add_argument(
+        '--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+
      # save intermediate file(s)
      parser.add_argument(
          '--save_intermediate',
@@ -120,6 +137,11 @@ def _convert(args):
              tmpdir = os.path.dirname(logfile_path)
          # convert onnx to tf saved model
          onnx_model = onnx.load(getattr(args, 'input_path'))
+        if _onnx_legalizer_enabled:
+            options = onnx_legalizer.LegalizeOptions
+            options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
+            options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+            onnx_legalizer.legalize(onnx_model, options)
          tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
  
          savedmodel_name = os.path.splitext(os.path.basename(
@@ -133,7 +155,7 @@ def _convert(args):
              args.output_path))[0] + '.tflite'
          tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
  
-        tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
              args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
  
          f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
@@ -143,7 +165,7 @@ def _convert(args):
  
          # make a command to convert from tflite to circle
          tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
-        tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
                                                             tf2tfliteV2_output_path,
                                                             getattr(args, 'output_path'))
  
diff --git a/compiler/one-cmds/one-import-pytorch b/compiler/one-cmds/one-import-pytorch

new file mode 100644 (file)

index 0000000..dbf1ba6
--- /dev/null
+++ b/compiler/one-cmds/one-import-pytorch
@@ -0,0 +1,366 @@
+#!/usr/bin/env bash
+''''export SCRIPT_PATH="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)" # '''
+''''export PY_PATH=${SCRIPT_PATH}/venv/bin/python                                       # '''
+''''test -f ${PY_PATH} && exec ${PY_PATH} "$0" "$@"                                     # '''
+''''echo "Error: Virtual environment not found. Please run 'one-prepare-venv' command." # '''
+''''exit 255                                                                            # '''
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import importlib
+import inspect
+import os
+import sys
+import tempfile
+import torch
+import onnx
+import onnx_tf
+import json
+import zipfile
+
+import onnx_legalizer
+import onelib.make_cmd as _make_cmd
+import utils as _utils
+
+# TODO Find better way to suppress trackback on error
+sys.tracebacklimit = 0
+
+
+def get_driver_spec():
+    return ("one-import-pytorch", _utils.DriverType.IMPORTER)
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(
+        description='command line tool to convert PyTorch to Circle')
+
+    _utils._add_default_arg(parser)
+
+    ## converter arguments
+    converter_group = parser.add_argument_group('converter arguments')
+
+    # input and output path.
+    converter_group.add_argument(
+        '-i', '--input_path', type=str, help='full filepath of the input file')
+    converter_group.add_argument(
+        '-p', '--python_path', type=str, help='full filepath of the python model file')
+    converter_group.add_argument(
+        '-o', '--output_path', type=str, help='full filepath of the output file')
+
+    # input arrays.
+    converter_group.add_argument(
+        '-s',
+        '--input_shapes',
+        type=str,
+        help=
+        'Shapes corresponding to --input_arrays, colon-separated.(ex:\"1,4,4,3:1,20,20,3\")'
+    )
+    converter_group.add_argument(
+        '-t',
+        '--input_types',
+        type=str,
+        help='data types of input tensors, colon-separated (ex: float32, uint8, int32)')
+
+    # fixed options
+    tf2tflite_group = parser.add_argument_group('tf2tfliteV2 arguments')
+    tf2tflite_group.add_argument('--model_format', default='saved_model')
+    tf2tflite_group.add_argument('--converter_version', default='v2')
+
+    parser.add_argument('--unroll_rnn', action='store_true', help='Unroll RNN operators')
+    parser.add_argument('--unroll_lstm', action='store_true', help='Unroll LSTM operators')
+
+    # save intermediate file(s)
+    parser.add_argument(
+        '--save_intermediate',
+        action='store_true',
+        help='Save intermediate files to output folder')
+
+    return parser
+
+
+def _verify_arg(parser, args):
+    """verify given arguments"""
+    # check if required arguments is given
+    missing = []
+    if not _utils._is_valid_attr(args, 'input_path'):
+        missing.append('-i/--input_path')
+    if not _utils._is_valid_attr(args, 'output_path'):
+        missing.append('-o/--output_path')
+    if not _utils._is_valid_attr(args, 'input_shapes'):
+        missing.append('-s/--input_shapes')
+    if not _utils._is_valid_attr(args, 'input_types'):
+        missing.append('-t/--input_types')
+
+    if len(missing):
+        parser.error('the following arguments are required: ' + ' '.join(missing))
+
+
+def _parse_arg(parser):
+    args = parser.parse_args()
+    # print version
+    if args.version:
+        _utils._print_version_and_exit(__file__)
+
+    return args
+
+
+def _apply_verbosity(verbosity):
+    # NOTE
+    # TF_CPP_MIN_LOG_LEVEL
+    #   0 : INFO + WARNING + ERROR + FATAL
+    #   1 : WARNING + ERROR + FATAL
+    #   2 : ERROR + FATAL
+    #   3 : FATAL
+    if verbosity:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
+    else:
+        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+
+
+def _parse_shapes(shapes_str):
+    shapes = []
+    for shape_str in shapes_str.split(":"):
+        if shape_str != "":
+            shapes += [list(map(int, shape_str.split(",")))]
+        else:
+            shapes += [[]]
+    return shapes
+
+
+def _parse_types(types_str):
+    # There are no convenient way to create torch from string ot numpy dtype, so using this workaround
+    dtype_dict = {
+        "bool": torch.bool,
+        "uint8": torch.uint8,
+        "int8": torch.int8,
+        "int16": torch.int16,
+        "int32": torch.int32,
+        "int64": torch.int64,
+        "float16": torch.float16,
+        "float32": torch.float32,
+        "float64": torch.float64,
+        "complex64": torch.complex64,
+        "complex128": torch.complex128
+    }
+    array = types_str.split(",")
+    types = [dtype_dict[type_str.strip()] for type_str in array]
+    return types
+
+
+# merge contents of module into global namespace
+def _merge_module(module):
+    # is there an __all__?  if so respect it
+    if "__all__" in module.__dict__:
+        names = module.__dict__["__all__"]
+    else:
+        # otherwise we import all names that don't begin with _
+        names = [x for x in module.__dict__ if not x.startswith("_")]
+    globals().update({k: getattr(module, k) for k in names})
+
+
+def _list_classes_from_module(module):
+    # Parsing the module to get all defined classes
+    is_member = lambda member: inspect.isclass(member) and member.__module__ == module.__name__
+    classes = [cls[1] for cls in inspect.getmembers(module, is_member)]
+    return classes
+
+
+def _extract_pytorch_model(log_file, parameters_path, python_path):
+    log_file.write(('Trying to load saved model\n').encode())
+    python_model_path = os.path.abspath(python_path)
+    module_name = os.path.basename(python_model_path)
+    module_dir = os.path.dirname(python_model_path)
+    sys.path.append(module_dir)
+    log_file.write(('Trying to load given python module\n').encode())
+    module_loader = importlib.machinery.SourceFileLoader(module_name, python_model_path)
+    module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+    python_model_module = importlib.util.module_from_spec(module_spec)
+
+    try:
+        module_loader.exec_module(python_model_module)
+    except:
+        raise ValueError('Failed to execute given python model file')
+
+    log_file.write(('Model python module is loaded\n').encode())
+    try:
+        # this branch assumes this parameters_path contains state_dict
+        state_dict = torch.load(parameters_path)
+        log_file.write(('Trying to find model class and fill it`s state dict\n').encode())
+        model_class_definitions = _list_classes_from_module(python_model_module)
+        if len(model_class_definitions) != 1:
+            raise ValueError("Expected only one class as model definition. {}".format(
+                model_class_definitions))
+        pytorch_model_class = model_class_definitions[0]
+        model = pytorch_model_class()
+        model.load_state_dict(state_dict)
+        return model
+    except:
+        # this branch assumes this parameters_path contains "entire" model
+        _merge_module(python_model_module)
+        log_file.write(('Model python module is merged into main environment\n').encode())
+        model = torch.load(parameters_path)
+        log_file.write(('Pytorch model loaded\n').encode())
+        return model
+
+
+def _extract_torchscript_model(log_file, input_path):
+    # assuming this is a pytorch script
+    log_file.write(('Trying to load TorchScript model\n').encode())
+    try:
+        pytorch_model = torch.jit.load(input_path)
+        return pytorch_model
+    except RuntimeError as e:
+        log_file.write((str(e) + '\n').encode())
+        log_file.write(
+            'Failed to import input file. Maybe this it contains only weights? Try pass "python_path" argument\n'.
+            encode())
+        raise
+    log_file.write(('TorchScript model is loaded\n').encode())
+
+
+def _extract_mar_model(log_file, tmpdir, input_path):
+    mar_dir_path = os.path.join(tmpdir, 'mar')
+    with zipfile.ZipFile(input_path) as zip_input:
+        zip_input.extractall(path=mar_dir_path)
+    manifest_path = os.path.join(mar_dir_path, 'MAR-INF/MANIFEST.json')
+    with open(manifest_path) as manifest_file:
+        manifest = json.load(manifest_file)
+    serialized_file = os.path.join(mar_dir_path, manifest['model']['serializedFile'])
+    if 'modelFile' in manifest['model']:
+        model_file = os.path.join(mar_dir_path, manifest['model']['modelFile'])
+        return _extract_pytorch_model(log_file, serialized_file, model_file)
+    else:
+        return _extract_torchscript_model(log_file, serialized_file)
+
+
+def _convert(args):
+    _apply_verbosity(args.verbose)
+
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+    with open(logfile_path, 'wb') as f, tempfile.TemporaryDirectory() as tmpdir:
+        # save intermediate
+        if _utils._is_valid_attr(args, 'save_intermediate'):
+            tmpdir = os.path.dirname(logfile_path)
+        # convert pytorch to onnx model
+        input_path = getattr(args, 'input_path')
+        model_file = getattr(args, 'python_path')
+
+        if input_path[-4:] == '.mar':
+            pytorch_model = _extract_mar_model(f, tmpdir, input_path)
+        elif model_file is None:
+            pytorch_model = _extract_torchscript_model(f, input_path)
+        else:
+            pytorch_model = _extract_pytorch_model(f, input_path, model_file)
+
+        input_shapes = _parse_shapes(getattr(args, 'input_shapes'))
+        input_types = _parse_types(getattr(args, 'input_types'))
+
+        if len(input_shapes) != len(input_types):
+            raise ValueError('number of input shapes and input types must be equal')
+
+        sample_inputs = []
+        for input_spec in zip(input_shapes, input_types):
+            sample_inputs += [torch.ones(input_spec[0], dtype=input_spec[1])]
+
+        f.write(('Trying to inference loaded model').encode())
+        sample_outputs = pytorch_model(*sample_inputs)
+        f.write(('Acquired sample outputs\n').encode())
+
+        onnx_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.onnx'
+        onnx_output_path = os.path.join(tmpdir, onnx_output_name)
+
+        onnx_saved = False
+        # some operations are not supported in early opset versions, try several
+        for onnx_opset_version in range(9, 15):
+            f.write(('Trying to save onnx model using opset version ' +
+                     str(onnx_opset_version) + '\n').encode())
+            try:
+                torch.onnx.export(
+                    pytorch_model,
+                    tuple(sample_inputs),
+                    onnx_output_path,
+                    example_outputs=sample_outputs,
+                    opset_version=onnx_opset_version)
+                onnx_saved = True
+                break
+            except:
+                f.write(('attempt failed\n').encode())
+
+        if not onnx_saved:
+            raise ValueError('Failed to save temporary onnx model')
+
+        # convert onnx to tf saved mode
+        onnx_model = onnx.load(onnx_output_path)
+
+        options = onnx_legalizer.LegalizeOptions()
+        options.unroll_rnn = _utils._is_valid_attr(args, 'unroll_rnn')
+        options.unroll_lstm = _utils._is_valid_attr(args, 'unroll_lstm')
+        onnx_legalizer.legalize(onnx_model, options)
+
+        tf_savedmodel = onnx_tf.backend.prepare(onnx_model)
+
+        savedmodel_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.savedmodel'
+        savedmodel_output_path = os.path.join(tmpdir, savedmodel_name)
+        tf_savedmodel.export_graph(savedmodel_output_path)
+
+        # make a command to convert from tf to tflite
+        tf2tfliteV2_path = os.path.join(dir_path, 'tf2tfliteV2.py')
+        tf2tfliteV2_output_name = os.path.splitext(os.path.basename(
+            args.output_path))[0] + '.tflite'
+        tf2tfliteV2_output_path = os.path.join(tmpdir, tf2tfliteV2_output_name)
+
+        del args.input_shapes
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(
+            args, tf2tfliteV2_path, savedmodel_output_path, tf2tfliteV2_output_path)
+
+        f.write((' '.join(tf2tfliteV2_cmd) + '\n').encode())
+
+        # convert tf to tflite
+        _utils._run(tf2tfliteV2_cmd, logfile=f)
+
+        # make a command to convert from tflite to circle
+        tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
+                                                           tf2tfliteV2_output_path,
+                                                           getattr(args, 'output_path'))
+
+        f.write((' '.join(tflite2circle_cmd) + '\n').encode())
+
+        # convert tflite to circle
+        _utils._run(tflite2circle_cmd, err_prefix="tflite2circle", logfile=f)
+
+
+def main():
+    # parse arguments
+    parser = _get_parser()
+    args = _parse_arg(parser)
+
+    # parse configuration file
+    _utils._parse_cfg(args, 'one-import-pytorch')
+
+    # verify arguments
+    _verify_arg(parser, args)
+
+    # convert
+    _convert(args)
+
+
+if __name__ == '__main__':
+    _utils._safemain(main, __file__)
diff --git a/compiler/one-cmds/one-import-tf b/compiler/one-cmds/one-import-tf

index e2294caa6c494357a12172b79ee722f11078b128..999255a34ac0c838f1820eb7993631a06428be29 100644 (file)
--- a/compiler/one-cmds/one-import-tf
+++ b/compiler/one-cmds/one-import-tf
@@ -25,9 +25,14 @@ import subprocess
  import sys
  import tempfile
  
+import onelib.make_cmd as _make_cmd
  import utils as _utils
  
  
+def get_driver_cfg_section():
+    return "one-import-tf"
+
+
  def _get_parser():
      parser = argparse.ArgumentParser(
          description='command line tool to convert TensorFlow to circle')
@@ -146,7 +151,7 @@ def _convert(args):
          tf2tfliteV2_output_path = os.path.join(
              tmpdir,
              os.path.splitext(os.path.basename(args.output_path))[0]) + '.tflite'
-        tf2tfliteV2_cmd = _utils._make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
+        tf2tfliteV2_cmd = _make_cmd.make_tf2tfliteV2_cmd(args, tf2tfliteV2_path,
                                                         getattr(args, 'input_path'),
                                                         tf2tfliteV2_output_path)
  
@@ -157,7 +162,7 @@ def _convert(args):
  
          # make a command to convert from tflite to circle
          tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
-        tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
                                                             tf2tfliteV2_output_path,
                                                             getattr(args, 'output_path'))
  
diff --git a/compiler/one-cmds/one-import-tflite b/compiler/one-cmds/one-import-tflite

index 7eee0484ab2d3816ea8a0ffe8cb6dc28a372e4b7..2d756bff692d386be2984019ec9a660866050cd6 100644 (file)
--- a/compiler/one-cmds/one-import-tflite
+++ b/compiler/one-cmds/one-import-tflite
@@ -24,12 +24,17 @@ import os
  import subprocess
  import sys
  
+import onelib.make_cmd as _make_cmd
  import utils as _utils
  
  # TODO Find better way to suppress trackback on error
  sys.tracebacklimit = 0
  
  
+def get_driver_cfg_section():
+    return "one-import-tflite"
+
+
  def _get_parser():
      parser = argparse.ArgumentParser(
          description='command line tool to convert TensorFlow lite to circle')
@@ -77,7 +82,7 @@ def _convert(args):
      with open(logfile_path, 'wb') as f:
          # make a command to convert from tflite to circle
          tflite2circle_path = os.path.join(dir_path, 'tflite2circle')
-        tflite2circle_cmd = _utils._make_tflite2circle_cmd(tflite2circle_path,
+        tflite2circle_cmd = _make_cmd.make_tflite2circle_cmd(tflite2circle_path,
                                                             getattr(args, 'input_path'),
                                                             getattr(args, 'output_path'))
  
diff --git a/compiler/one-cmds/one-optimize b/compiler/one-cmds/one-optimize

index a64abff1929b677ddab2628f2e0529276ec517d0..8b1f3f7be008fb13415e106f78f5cc68f725e203 100644 (file)
--- a/compiler/one-cmds/one-optimize
+++ b/compiler/one-cmds/one-optimize
@@ -24,6 +24,8 @@ import os
  import subprocess
  import sys
  
+import onelib.constant as _constant
+import onelib.make_cmd as _make_cmd
  import utils as _utils
  
  # TODO Find better way to suppress trackback on error
@@ -60,7 +62,7 @@ def _get_parser():
          '-o', '--output_path', type=str, help='full filepath of the output file')
  
      # optimization pass
-    for opt in _utils._CONSTANT.OPTIMIZATION_OPTS:
+    for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
          # opt = (option_name, help_message)
          circle2circle_group.add_argument('--' + opt[0], action='store_true', help=opt[1])
  
@@ -99,7 +101,7 @@ def _optimize(args):
      with open(logfile_path, 'wb') as f:
          # make a command to optimize circle model
          circle2circle_path = os.path.join(dir_path, 'circle2circle')
-        circle2circle_cmd = _utils._make_circle2circle_cmd(args, circle2circle_path,
+        circle2circle_cmd = _make_cmd.make_circle2circle_cmd(args, circle2circle_path,
                                                             getattr(args, 'input_path'),
                                                             getattr(args, 'output_path'))
  
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv

index 285191761d5b76155dfbded8a5df400c372e05e1..0f75166a7ecefbecfbee9e8afcf4bfa21740aa79 100644 (file)
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -26,16 +26,17 @@ VENV_PYTHON=${DRIVER_PATH}/venv/bin/python
  
  if [ ! -f ${VENV_ACTIVATE} ]; then
    # Create python virtual enviornment
-  python3 -m venv "${DRIVER_PATH}/venv"
+  python3.8 -m venv "${DRIVER_PATH}/venv"
  fi
  
  # NOTE version
  # - https://github.com/onnx/onnx/blob/master/docs/Versioning.md
  # - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
  
-VER_TENSORFLOW=2.3.0
-VER_ONNX=1.10.1
-VER_ONNX_TF=1.9.0
+VER_TENSORFLOW=2.8.0
+VER_ONNX=1.11.0
+VER_ONNXRUNTIME=1.11.0
+VER_ONNX_TF=1.10.0
  
  # Install tensorflow
  
@@ -54,18 +55,32 @@ if [[ ! -z "$ONE_PREPVENV_PIP_OPTION" ]]; then
    PIP_OPTIONS+=" ${ONE_PREPVENV_PIP_OPTION} "
  fi
  
-# TODO remove version number of 'pip==20.2.1 setuptools==49.3.0'
-# NOTE adding version is for temporary hotfix of setuptools 50.x.y version
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install -U pip==20.2.1 setuptools==49.3.0
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install --upgrade pip setuptools
+if [ -n "${EXT_TENSORFLOW_WHL}" ]; then
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_TENSORFLOW_WHL}
+else
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow_probability
  
  # Install PyTorch and ONNX related
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
+# NOTE set ONE_PREPVENV_TORCH_STABLE to override 'torch_stable.html' URL.
+#      torch_stable.html points to download URL of torch wheel file(s)
+#      but sometimes the server gets unstable, especially from in-house CI.
+TORCH_STABLE_URL="https://download.pytorch.org/whl/torch_stable.html"
+if [[ ! -z "$ONE_PREPVENV_TORCH_STABLE" ]]; then
+  TORCH_STABLE_URL="${ONE_PREPVENV_TORCH_STABLE}"
+fi
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.11.0+cpu -f ${TORCH_STABLE_URL}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX}
+
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnxruntime==${VER_ONNXRUNTIME}
  
  # Provide install of custom onnx-tf
  if [ -n "${EXT_ONNX_TF_WHL}" ]; then
-  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} ${EXT_ONNX_TF_WHL}
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install ${EXT_ONNX_TF_WHL}
  else
-  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF}
+  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx-tf==${VER_ONNX_TF}
  fi
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize

index 22d4ddb0e9a216a40ee22107321fac73f1e5ba7c..f2eff24bd4fd6c4b58881b3a4055e009b98b51d6 100644 (file)
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -119,6 +119,18 @@ def _get_parser():
          help=
          "calibration algorithm for post-training quantization (supported: percentile/moving_average, default=percentile). 'percentile' mode uses the n-th percentiles as min/max values. 'moving_average' mode records the moving average of min/max."
      )
+    quantization_group.add_argument(
+        '--TF-style_maxpool',
+        action='store_true',
+        help=
+        "Force MaxPool Op to have the same input/output quantparams. NOTE: This option can degrade accuracy of some models.)"
+    )
+    quantization_group.add_argument(
+        '--quant_config',
+        type=str,
+        help=
+        "Path to the quantization configuration file."
+    )
  
      # arguments for force_quantparam option
      force_quantparam_group = parser.add_argument_group(
@@ -137,6 +149,19 @@ def _get_parser():
      force_quantparam_group.add_argument(
          '--zero_point', type=int, action='append', help='zero point (int)')
  
+    # arguments for copy_quantparam option
+    copy_quantparam_group = parser.add_argument_group(
+        'arguments for copy_quantparam option')
+
+    copy_quantparam_group.add_argument(
+        '--copy_quantparam',
+        action='store_true',
+        help='copy quantparam (scale, zero_point) of a tensor to another tensor.')
+    copy_quantparam_group.add_argument(
+        '--src_tensor_name', type=str, action='append', help='tensor name (string)')
+    copy_quantparam_group.add_argument(
+        '--dst_tensor_name', type=str, action='append', help='tensor name (string)')
+
      return parser
  
  
@@ -171,6 +196,11 @@ def _verify_arg(parser, args):
              missing.append('--scale')
          if not _utils._is_valid_attr(args, 'zero_point'):
              missing.append('--zero_point')
+    if _utils._is_valid_attr(args, 'copy_quantparam'):
+        if not _utils._is_valid_attr(args, 'src_tensor_name'):
+            missing.append('--src_tensor_name')
+        if not _utils._is_valid_attr(args, 'dst_tensor_name'):
+            missing.append('--dst_tensor_name')
      if len(missing):
          parser.error('the following arguments are required: ' + ' '.join(missing))
      if _utils._is_valid_attr(args, 'force_quantparam'):
@@ -180,6 +210,12 @@ def _verify_arg(parser, args):
          if len(tensors) != len(scales) or len(tensors) != len(zerops):
              parser.error(
                  'The same number of tensor_name, scale, and zero_point should be given.')
+    if _utils._is_valid_attr(args, 'copy_quantparam'):
+        src_tensors = getattr(args, 'src_tensor_name')
+        dst_tensors = getattr(args, 'dst_tensor_name')
+        if len(src_tensors) != len(dst_tensors):
+            parser.error(
+                'The same number of src_tensor_name and dst_tensor_name should be given.')
  
  
  def _parse_arg(parser):
@@ -197,6 +233,11 @@ def _quantize(args):
          _write_qparam(args)
          return
  
+    if _utils._is_valid_attr(args, 'copy_quantparam'):
+        # copy quantization parameters
+        _copy_qparam(args)
+        return
+
      # get file path to log
      dir_path = os.path.dirname(os.path.realpath(__file__))
      logfile_path = os.path.realpath(args.output_path) + '.log'
@@ -294,12 +335,19 @@ def _quantize(args):
              circle_quantizer_cmd.append(getattr(args, 'quantized_dtype'))
          if _utils._is_valid_attr(args, 'granularity'):
              circle_quantizer_cmd.append(getattr(args, 'granularity'))
+        if _utils._is_valid_attr(args, 'TF-style_maxpool'):
+            circle_quantizer_cmd.append('--TF-style_maxpool')
          if _utils._is_valid_attr(args, 'input_type'):
              circle_quantizer_cmd.append('--input_type')
              circle_quantizer_cmd.append(getattr(args, 'input_type'))
          if _utils._is_valid_attr(args, 'output_type'):
              circle_quantizer_cmd.append('--output_type')
              circle_quantizer_cmd.append(getattr(args, 'output_type'))
+        if _utils._is_valid_attr(args, 'quant_config'):
+            # NOTE --config conflicts with --config option in onecc, so
+            # we use quant_config for one-quantize
+            circle_quantizer_cmd.append('--config')
+            circle_quantizer_cmd.append(getattr(args, 'quant_config'))
          # input and output path
          circle_quantizer_cmd.append(tmp_output_path_2)
          if _utils._is_valid_attr(args, 'output_path'):
@@ -351,6 +399,40 @@ def _write_qparam(args):
          _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
  
  
+def _copy_qparam(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+        # make a command to write qparams to the tensors
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if _utils._is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        if _utils._is_valid_attr(args, 'src_tensor_name'):
+            src_tensor_name = getattr(args, 'src_tensor_name')
+        if _utils._is_valid_attr(args, 'dst_tensor_name'):
+            dst_tensor_name = getattr(args, 'dst_tensor_name')
+        for (src, dst) in zip(src_tensor_name, dst_tensor_name):
+            circle_quantizer_cmd.append('--copy_quantparam')
+            circle_quantizer_cmd.append(src)
+            circle_quantizer_cmd.append(dst)
+        # input and output path
+        if _utils._is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        if _utils._is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
  def main():
      # parse arguments
      parser = _get_parser()
diff --git a/compiler/one-cmds/onecc b/compiler/one-cmds/onecc

index ca440d852b913fc593032551118422c2e6b5a8f3..25682ff4b45e3db7e1536cfe28d020458584e8c4 100644 (file)
--- a/compiler/one-cmds/onecc
+++ b/compiler/one-cmds/onecc
@@ -104,10 +104,6 @@ def _verify_arg(parser, args):
  
  def _get_driver_name(driver_name):
      return {
-        'one-import-bcq': 'one-import-bcq',
-        'one-import-tf': 'one-import-tf',
-        'one-import-tflite': 'one-import-tflite',
-        'one-import-onnx': 'one-import-onnx',
          'one-optimize': 'one-optimize',
          'one-quantize': 'one-quantize',
          'one-pack': 'one-pack',
@@ -130,19 +126,15 @@ def _is_available_driver(config, driver_name):
          'onecc', driver_name)
  
  
-def _verify_cfg(driver_list, config):
+def _verify_cfg(import_driver_list, config):
      if not config.has_section('onecc'):
          raise ImportError('[onecc] section is required in configuration file')
  
      import_driver_cnt = 0
-    if _is_available_driver(config, 'one-import-tf'):
-        import_driver_cnt += 1
-    if _is_available_driver(config, 'one-import-tflite'):
-        import_driver_cnt += 1
-    if _is_available_driver(config, 'one-import-bcq'):
-        import_driver_cnt += 1
-    if _is_available_driver(config, 'one-import-onnx'):
-        import_driver_cnt += 1
+    for d in import_driver_list:
+        if _is_available_driver(config, d):
+            import_driver_cnt += 1
+
      if import_driver_cnt > 1:
          raise AssertionError('Only one import-* driver can be executed')
  
@@ -170,22 +162,27 @@ def main():
      config = _parse_cfg(args)
  
      # verify configuration file
-    drivers = [
-        'one-import-tf', 'one-import-tflite', 'one-import-bcq', 'one-import-onnx',
+    bin_dir = os.path.dirname(os.path.realpath(__file__))
+    import_drivers_dict = _utils._detect_one_import_drivers(bin_dir)
+    transform_drivers = [
          'one-optimize', 'one-quantize', 'one-pack', 'one-codegen', 'one-profile'
      ]
-    _verify_cfg(drivers, config)
+    _verify_cfg(import_drivers_dict, config)
  
      # get sections to run
      section_to_run = []
-    for d in drivers:
+    for d in list(import_drivers_dict) + transform_drivers:
          if _is_available_driver(config, d):
              section_to_run.append(d)
  
      # run
      dir_path = os.path.dirname(os.path.realpath(__file__))
      for section in section_to_run:
-        driver_name = _get_driver_name(section)
+        if section in import_drivers_dict:
+            # we already has driver name in dict
+            driver_name = import_drivers_dict[section]
+        else:
+            driver_name = _get_driver_name(section)
          options = ['--config', getattr(args, 'config'), '--section', section]
          if _utils._is_valid_attr(args, 'verbose'):
              options.append('--verbose')
diff --git a/compiler/one-cmds/onelib/constant.py b/compiler/one-cmds/onelib/constant.py

new file mode 100644 (file)

index 0000000..7ddd738
--- /dev/null
+++ b/compiler/one-cmds/onelib/constant.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class CONSTANT:
+    __slots__ = ()  # This prevents access via __dict__.
+    OPTIMIZATION_OPTS = (
+        # (OPTION_NAME, HELP_MESSAGE)
+        ('O1', 'enable O1 optimization pass'),
+        ('convert_nchw_to_nhwc',
+         'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
+         ),
+        ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
+        ('nchw_to_nhwc_input_shape',
+         'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('nchw_to_nhwc_output_shape',
+         'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
+        ('fold_add_v2', 'fold AddV2 op with constant inputs'),
+        ('fold_cast', 'fold Cast op with constant input'),
+        ('fold_dequantize', 'fold Dequantize op'),
+        ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
+        ('fold_gather', 'fold Gather op'),
+        ('fold_sparse_to_dense', 'fold SparseToDense op'),
+        ('forward_reshape_to_unaryop', 'Forward Reshape op'),
+        ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+        ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
+        ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
+        ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
+        ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
+        ('fuse_bcq', 'apply Binary Coded Quantization'),
+        ('fuse_preactivation_batchnorm',
+         'fuse BatchNorm operators of pre-activations to Convolution op'),
+        ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
+        ('fuse_transpose_with_mean',
+         'fuse Mean with a preceding Transpose under certain conditions'),
+        ('make_batchnorm_gamma_positive',
+         'make negative gamma of BatchNorm to a small positive value (1e-10).'
+         ' Note that this pass can change the execution result of the model.'
+         ' So, use it only when the impact is known to be acceptable.'),
+        ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
+        ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
+        ('replace_cw_mul_add_with_depthwise_conv',
+         'replace channel-wise Mul/Add with DepthwiseConv2D'),
+        ('remove_fakequant', 'remove FakeQuant ops'),
+        ('remove_quantdequant', 'remove Quantize-Dequantize sequence'),
+        ('remove_redundant_quantize', 'remove redundant Quantize ops'),
+        ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
+        ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
+        ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
+        ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
+        ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
+        ('remove_unnecessary_split', 'remove unnecessary split ops'),
+        ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
+        ('resolve_customop_batchmatmul',
+         'convert Custom(BatchMatmul) op to BatchMatmul op'),
+        ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
+        ('resolve_customop_max_pool_with_argmax',
+         'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
+        ('shuffle_weight_to_16x1float32',
+         'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
+         ' Note that it only converts weights whose row is a multiple of 16'),
+        ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+        ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
+        ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
+        ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
+        ('substitute_strided_slice_to_reshape',
+         'convert certain condition StridedSlice to Reshape'),
+        ('substitute_transpose_to_reshape',
+         'convert certain condition Transpose to Reshape'),
+        ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
+        ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
+
+
+CONSTANT = CONSTANT()
diff --git a/compiler/one-cmds/onelib/make_cmd.py b/compiler/one-cmds/onelib/make_cmd.py

new file mode 100644 (file)

index 0000000..d8380f2
--- /dev/null
+++ b/compiler/one-cmds/onelib/make_cmd.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+import onelib.constant as _constant
+
+def _is_valid_attr(args, attr):
+    return hasattr(args, attr) and getattr(args, attr)
+
+
+def make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
+    """make a command for running tf2tfliteV2.py"""
+    cmd = [sys.executable, os.path.expanduser(driver_path)]
+    # verbose
+    if _is_valid_attr(args, 'verbose'):
+        cmd.append('--verbose')
+    # model_format
+    if _is_valid_attr(args, 'model_format_cmd'):
+        cmd.append(getattr(args, 'model_format_cmd'))
+    elif _is_valid_attr(args, 'model_format'):
+        cmd.append('--' + getattr(args, 'model_format'))
+    else:
+        cmd.append('--graph_def')  # default value
+    # converter version
+    if _is_valid_attr(args, 'converter_version_cmd'):
+        cmd.append(getattr(args, 'converter_version_cmd'))
+    elif _is_valid_attr(args, 'converter_version'):
+        cmd.append('--' + getattr(args, 'converter_version'))
+    else:
+        cmd.append('--v1')  # default value
+    # input_path
+    if _is_valid_attr(args, 'input_path'):
+        cmd.append('--input_path')
+        cmd.append(os.path.expanduser(input_path))
+    # output_path
+    if _is_valid_attr(args, 'output_path'):
+        cmd.append('--output_path')
+        cmd.append(os.path.expanduser(output_path))
+    # input_arrays
+    if _is_valid_attr(args, 'input_arrays'):
+        cmd.append('--input_arrays')
+        cmd.append(getattr(args, 'input_arrays'))
+    # input_shapes
+    if _is_valid_attr(args, 'input_shapes'):
+        cmd.append('--input_shapes')
+        cmd.append(getattr(args, 'input_shapes'))
+    # output_arrays
+    if _is_valid_attr(args, 'output_arrays'):
+        cmd.append('--output_arrays')
+        cmd.append(getattr(args, 'output_arrays'))
+
+    return cmd
+
+
+def make_tflite2circle_cmd(driver_path, input_path, output_path):
+    """make a command for running tflite2circle"""
+    cmd = [driver_path, input_path, output_path]
+    return [os.path.expanduser(c) for c in cmd]
+
+
+def make_circle2circle_cmd(args, driver_path, input_path, output_path):
+    """make a command for running circle2circle"""
+    cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
+    # profiling
+    if _is_valid_attr(args, 'generate_profile_data'):
+        cmd.append('--generate_profile_data')
+    # optimization pass(only true/false options)
+    # TODO support options whose number of arguments is more than zero
+    for opt in _constant.CONSTANT.OPTIMIZATION_OPTS:
+        if _is_valid_attr(args, opt[0]):
+            # ./driver --opt[0]
+            if type(getattr(args, opt[0])) is bool:
+                cmd.append('--' + opt[0])
+            """
+            This condition check is for config file interface, usually would be
+             SomeOption=True
+            but user can write as follows while development
+             SomeOption=False
+            instead of removing SomeOption option
+            """
+            if type(getattr(args, opt[0])) is str and not getattr(
+                    args, opt[0]).lower() in ['false', '0', 'n']:
+                cmd.append('--' + opt[0])
+
+    return cmd
diff --git a/compiler/one-cmds/onnx_legalizer.py b/compiler/one-cmds/onnx_legalizer.py

new file mode 100755 (executable)

index 0000000..26c2b75
--- /dev/null
+++ b/compiler/one-cmds/onnx_legalizer.py
@@ -0,0 +1,1065 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import onnx.numpy_helper
+import sys
+import numpy as np
+import re
+
+# Transform onnx model to make it compilable with our toolchain
+#
+# This code works with onnx model in proto format. See proto buffers format in
+# https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3
+#
+# More examples of handling onnx models could be found here:
+# https://github.com/onnx/onnx/tree/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/examples
+#
+# List of transformations:
+# - Replace RNN operation with unrolled subgraph
+# - Replace LSTM operation with unrolled subgraph
+
+
+class LegalizeOptions:
+    """Controls transformations that legalizer apply
+
+    Attributes:
+        unroll_rnn (bool): default is False. If True - unrolls RNN operations
+        unroll_lstm (bool): default is False. If True - unrolls LSTM operations
+    """
+
+    unroll_rnn = False
+    unroll_lstm = False
+
+
+def _reverse_str(s):
+    return ''.join(reversed(s))
+
+
+def _parse_tensor_name(name):
+    """Splits tensor name to base part and serial number
+
+    Most of tensor names have following format: "tensor_123".
+    This  function breaks name into two values: "tensor_" and 123.
+    Tensor names like this: "321" are broken into "" and 321.
+
+    Serial number is used to create unique tensor names using given base name.
+
+    Args:
+        name (str): tensor name
+
+    Returns:
+        tuple of str, int: base name and serial number of tensor
+    """
+    rev = _reverse_str(name)
+    m = re.match('(\d*)(.*)', rev)
+    if m.groups()[0] != '':
+        return (_reverse_str(m.groups()[1]), int(_reverse_str(m.groups()[0])))
+    else:
+        return (_reverse_str(m.groups()[1]), 0)
+
+
+class _ModelTransformerHelper:
+    """Helper for onnx model transformation
+
+    This helper is used for convenient operation replacement in onnx model
+
+    Attributes:
+        _model (onnx.onnx_ml_pb2.ModelProto): target model that should be changed
+        _nodes_to_delete (list of onnx.onnx_ml_pb2.NodeProto): list of replaced operations
+        _insert_id (int): position to insert created operations (should be in topologically sorted)
+        _base_name_idx (dict from str to int): maps tensor "base" name to
+            largest existing serial num. For example model has tensors "t_1", "t_2", "t_4",
+            in that case _base_name_idx["t_"] == 4.
+            This attribute is used for unique tensor name generation.
+    """
+
+    def __init__(self, model):
+        self._model = model
+        self._nodes_to_delete = []
+        self._insert_id = 0
+        # each tensor has name containing base name and unique number. for example:
+        # "abc_123": "abs_" - base name, "123" - unique number
+        # if no number in name, consider it is equal to "0"
+
+        # mapping from base names to largest given number
+        self._base_name_idx = {}
+        # gather name information for existing tensors
+        for node in model.graph.node:
+            for t in list(node.input) + list(node.output):
+                base_name, number = _parse_tensor_name(t)
+                if base_name in self._base_name_idx:
+                    self._base_name_idx[base_name] = max(self._base_name_idx[base_name],
+                                                         number)
+                else:
+                    self._base_name_idx[base_name] = number
+
+    def make_tensor_with_base_name(self, base_name):
+        """ Create unique name for given base_name
+
+        Args:
+            base_name (str): base tensor name
+
+        Returns:
+            str : unique tensor name that starts with base_name
+        """
+        if base_name in self._base_name_idx:
+            self._base_name_idx[base_name] += 1
+            return base_name + str(self._base_name_idx[base_name])
+        else:
+            self._base_name_idx[base_name] = 0
+            return base_name + '0'
+
+    def make_node(self, opcode, inputs, outputs, *p_args, **k_args):
+        """Create arbitrary node and insert it in graph.
+
+        Args:
+            opcode (str): opcode name of desired operation
+            inputs (list of str): names of input tensors
+            outputs (list of str or int): names of existing tensors to use as output tensors for operation or
+                number of tensors that should be created
+            p_args: additional arguments for onnx make_node helper
+            k_args: attributes for onnx node
+
+        Returns:
+            list of str: list of output tensor names
+        """
+        if type(outputs) == int:
+            outputs = [self.make_tensor_with_base_name('') for i in range(outputs)]
+        assert (type(outputs) == list)
+        node = onnx.helper.make_node(opcode, inputs, outputs, *p_args, **k_args)
+        self._model.graph.node.insert(self._insert_id, node)
+        self._insert_id += 1
+        return outputs
+
+    def make_split(self, input, split_sizes, axis):
+        """Create Split operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            split_sizes (list of int): list of split sizes
+            axis (int): number of axis to split
+
+        Returns:
+            list: list of output tensor names
+        """
+        return self.make_node(
+            'Split', [input], len(split_sizes), axis=axis, split=split_sizes)
+
+    def make_concat(self, inputs, axis):
+        """Create Concat operation and insert it in graph.
+
+        Args:
+            inputs (list of str): list of tensors names to concat
+            axis (int): axis number to concat
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Concat', inputs, 1, axis=axis)[0]
+
+    def make_squeeze(self, input, axes):
+        """Create Squeeze operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            axes (list of int): list of dimension containing ones to remove
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Squeeze', [input], 1, axes=axes)[0]
+
+    def make_unsqueeze(self, input, axes):
+        """Create Unsqueeze operation and insert it in graph.
+
+        Args:
+            input (str): name of input tensor
+            axes (list of int): list of dimension to insert ones
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Unsqueeze', [input], 1, axes=axes)[0]
+
+    def make_gemm(self, A, B, C, trans_a=False, trans_b=False):
+        """Create Gemm operation and insert it in graph.
+
+        Result tensor contains A*B + C
+
+        Args:
+            A (str): name of tensor A
+            B (str): name of tensor B
+            C (str): name of tensor C
+            transA (bool): if True, transpose tensor A before multiplication
+            transB (bool): if True, transpose tensor B before multiplication
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node(
+            'Gemm', [A, B, C], 1, transA=bool(trans_a), transB=bool(trans_b))[0]
+
+    def make_add(self, a, b):
+        """Creates Add operation and insert it in graph.
+
+        Args:
+            a (str): name of left operand tensor
+            b (str): name of right operand tensor
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Add', [a, b], 1)[0]
+
+    def make_mul(self, a, b):
+        """Creates Mul operation and insert it in graph.
+
+        Args:
+            a (str): name of left operand tensor
+            b (str): name of right operand tensor
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Mul', [a, b], 1)[0]
+
+    def make_clip(self, input, min, max):
+        """Create Clip operation and insert it in graph.
+
+        Args:
+            input (str): input tensor name
+            min (int/float): lower clip bound
+            max (int/float ): upper clip bound
+
+        Returns:
+            str: output tensor name
+        """
+        return self.make_node('Clip', [input], 1, min=min, max=max)[0]
+
+    def make_act(self, input, act_name):
+        """Create activation function operation and insert it in graph.
+
+        Args:
+            input (str): input tensor name
+            act_name (str): name of activation function, one of ['Relu', 'Tanh', 'Sigmoid']
+
+        Returns:
+            str: output tensor name
+        """
+        assert (act_name in ['Relu', 'Tanh', 'Sigmoid'])
+        return self.make_node(act_name, [input], 1)[0]
+
+    def make_constant_tensor(self, tensor_data, base_name):
+        """Creates onnx constant tensor
+
+        Args:
+            tensor_data (numpy.ndarray): tensor data
+            base_name (str): prefix of constant tensor name
+
+        Returns:
+            str: name of created constant tensor
+        """
+        tensor = onnx.numpy_helper.from_array(tensor_data)
+        tensor.name = self.make_tensor_with_base_name(base_name)
+        self._model.graph.initializer.append(tensor)
+        return tensor.name
+
+    def mark_for_deletion(self, node):
+        self._nodes_to_delete += [node]
+
+    def get_insert_id(self):
+        return self._insert_id
+
+    def set_insert_id(self, insert_id):
+        self._insert_id = insert_id
+
+    def delete_marked_nodes(self):
+        for node in self._nodes_to_delete:
+            self._model.graph.node.remove(node)
+
+
+class _TensorInfo:
+    def __init__(self, dtype, shape):
+        self.dtype = dtype
+        self.shape = shape
+
+
+def _get_tensor_infos(model):
+    """Infer tensor shapes and dtypes
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): model to process
+
+    Returns:
+        dict from str to _TensorInfo: maps tensor name to shape and dtype information
+    """
+
+    inferred_shape_model = onnx.shape_inference.infer_shapes(model)
+
+    infos = {}
+    for tensor in list(inferred_shape_model.graph.value_info) + list(
+            inferred_shape_model.graph.input):
+        info = _TensorInfo(tensor.type.tensor_type.elem_type, [])
+        for dim in tensor.type.tensor_type.shape.dim:
+            info.shape += [dim.dim_value]
+        infos[tensor.name] = info
+
+    for tensor in list(model.graph.initializer):
+        infos[tensor.name] = _TensorInfo(tensor.data_type, tensor.dims)
+    return infos
+
+
+def _dtype_to_np(dtype):
+    """Convert onnx dtype value to numpy dtype class
+
+    For more types see:
+    https://github.com/onnx/onnx/blob/96516aecd4c110b0ac57eba08ac236ebf7205728/onnx/onnx.proto3#L484
+
+    Args:
+        dtype (int): onnx dtype
+
+    Returns:
+        numpy data type: numpy dtype, like np.float32
+    """
+
+    if dtype == 1:
+        return np.float32
+    else:
+        raise NotImplementedError('unsupported data type')
+
+
+def _generate_one_direction_RNN(transformer, X, W, R, B, initial_h, clip, activation_name):
+    """Generate subgraph of one direction of unrolled RNN layer
+
+    Args:
+        transformer (_ModelTransformerHelper): helper for model generation
+        X (list of str): names of input tensors in sequence. Tensor shapes: [batch_size, input_size].
+        W (str): name of weight tensor
+        R (str): name of recurrence weight tensor
+        B (str): name of bias tensor
+        initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+        clip (float or None): range which clips input of activations
+        act (str): activation function
+    """
+    # one direction RNN:
+    #
+    # For details see:
+    # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#RNN-7
+    #
+    # H = f(X*(W^T) + h*(R^T) + B)
+    #
+    # H  - new hidden state
+    # h  - previous hidden state
+    # X  - current input
+    # W  - input weights matrix
+    # R  - reccurent weights matrix
+    # Wb - input weights matmul bias
+    # Rb - reccurent weights matmul bias
+    # f  - activation function
+
+    seq_length = len(X)
+    first_iter = 0
+    state_tensors = []
+    if initial_h is not None:
+        previous_state_tensor = initial_h
+    else:
+        first_iter = 1
+        state_tensor = transformer.make_gemm(X[0], W, B, trans_b=True)
+        if clip != None:
+            state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+        previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+        state_tensors += [previous_state_tensor]
+
+    for i in range(first_iter, seq_length):
+        state_tensor = transformer.make_gemm(X[i], W, B, trans_b=True)
+        state_tensor = transformer.make_gemm(
+            previous_state_tensor, R, state_tensor, trans_b=True)
+        if clip != None:
+            state_tensor = transformer.make_clip(state_tensor, min=-clip, max=clip)
+        previous_state_tensor = transformer.make_act(state_tensor, activation_name)
+        state_tensors += [previous_state_tensor]
+    return state_tensors
+
+
+def _transform_unidirectional_RNN(transformer, original_node, x, tensor_infos, activation,
+                                 clip, direction, hidden_size, layout):
+    """Generate Simple (forward or reverse) unrolled RNN
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional RNN operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activation (str): name of activation function
+        clip (float or None): range which clips input of activations
+        direction (str): "forward" or "reverse"
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    if direction == 'reverse':
+        x.reverse()
+    w = transformer.make_squeeze(inputs[1], axes=[0])
+    r = transformer.make_squeeze(inputs[2], axes=[0])
+    if len(inputs) > 3 and inputs[3] != '':
+        raw_bias_tensor = transformer.make_squeeze(inputs[3], axes=[0])
+        splitted_bias_tensors = transformer.make_split(
+            raw_bias_tensor, split_sizes=[hidden_size] * 2, axis=0)
+        b = transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+    else:
+        data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+        b = transformer.make_constant_tensor(
+            np.zeros(hidden_size, dtype=data_type), "zero_bias")
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+    else:
+        initial_h = None
+    state_tensors = _generate_one_direction_RNN(transformer, x, w, r, b, initial_h, clip,
+                                               activation)
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for state in state_tensors:
+        state_layout_tensors += [
+            transformer.make_unsqueeze(state, axes=[seq_length_dim, y_direction_dim])
+        ]
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Unsqueeze', [state_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+    Y = outputs[0]
+    transformer.make_node(
+        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_RNN(transformer, original_node, x, tensor_infos, activations,
+                                clip, hidden_size, layout):
+    """Generate Bidirectional unrolled RNN
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional RNN operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of len (2) containing names of forward and reverse activations
+        clip (float or None): range which clips input of activations
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-56
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    w_bi = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+    r_bi = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+    w = []
+    r = []
+    for d in range(2):
+        w += [transformer.make_squeeze(w_bi[d], axes=[0])]
+        r += [transformer.make_squeeze(r_bi[d], axes=[0])]
+
+    b = []
+    if len(inputs) > 3 and inputs[3] != '':
+        raw_bias_tensors = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            raw_bias_tensors_squeezed = transformer.make_squeeze(
+                raw_bias_tensors[d], axes=[0])
+            splitted_bias_tensors = transformer.make_split(
+                raw_bias_tensors_squeezed, split_sizes=[hidden_size] * 2, axis=0)
+            b += [
+                transformer.make_add(splitted_bias_tensors[0], splitted_bias_tensors[1])
+            ]
+    else:
+        data_type = _dtype_to_np(tensor_infos[inputs[2]].dtype)
+        b = [
+            transformer.make_constant_tensor(
+                np.zeros(hidden_size, dtype=data_type), "zero_bias")
+        ] * 2
+    initial_h = [None, None]
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_split(
+            inputs[5], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+    state_f_tensors = _generate_one_direction_RNN(transformer, x, w[0], r[0], b[0],
+                                                 initial_h[0], clip, activations[0])
+    x.reverse()
+    state_b_tensors = _generate_one_direction_RNN(transformer, x, w[1], r[1], b[1],
+                                                 initial_h[1], clip, activations[1])
+    state_b_tensors.reverse()
+
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    seq_length = len(x)
+    for t in range(seq_length):
+        state_f = state_f_tensors[t]
+        state_b = state_b_tensors[t]
+        state_layout_tensors_f = transformer.make_unsqueeze(
+            state_f, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors_b = transformer.make_unsqueeze(
+            state_b, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors += [
+            transformer.make_concat(
+                [state_layout_tensors_f, state_layout_tensors_b], axis=y_direction_dim)
+        ]
+
+    last_f_state_layout_tensor = transformer.make_unsqueeze(
+        state_f_tensors[-1], axes=[y_h_direction_dim])
+    last_b_state_layout_tensor = transformer.make_unsqueeze(
+        state_b_tensors[0], axes=[y_h_direction_dim])
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+        axis=y_h_direction_dim)
+
+    Y = outputs[0]
+    transformer.make_node(
+        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_RNN(transformer, tensor_infos, node):
+    """Unroll RNN operation
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        node (onnx.onnx_ml_pb2.NodeProto): RNN operation to unroll
+    """
+    inputs = node.input
+    if len(inputs) > 4 and inputs[4] != '':
+        raise NotImplementedError('Variadic length of output is not supported')
+    # attributes
+    activation_alpha = []
+    activation_beta = []
+    activations = ['Tanh', 'Tanh']
+    clip = None
+    direction = 'forward'
+    hidden_size = 0
+    layout = 0
+
+    for attr in node.attribute:
+        if attr.name == 'activation_alpha':
+            activation_alpha = attr.floats
+        if attr.name == 'activation_beta':
+            activation_beta = attr.floats
+        if attr.name == 'activations':
+            activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+        if attr.name == 'clip':
+            clip = attr.f
+        if attr.name == 'direction':
+            direction = attr.s.decode('UTF-8')
+        if attr.name == 'hidden_size':
+            hidden_size = attr.i
+        if attr.name == 'layout':
+            layout = attr.i
+
+    if len(activation_alpha) > 0 or len(activation_beta) > 0:
+        raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+    for act in activations:
+        if act not in ['Relu', 'Tanh', 'Sigmoid']:
+            raise NotImplementedError('Unsupported activation function')
+
+    seq_length_dim = layout
+    seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+    if hidden_size == 0:
+        hidden_size = tensor_infos[inputs[2]].shape[2]
+
+    input_split_tensor = transformer.make_split(
+        inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+    x = []
+    for i in range(len(input_split_tensor)):
+        input_frame_tensor = input_split_tensor[i]
+        squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+        x += [squeezed_frame_tensor]
+
+    if direction in ['forward', 'reverse']:
+        _transform_unidirectional_RNN(transformer, node, x, tensor_infos, activations[0],
+                                     clip, direction, hidden_size, layout)
+    elif direction == 'bidirectional':
+        _transform_bidirectional_RNN(transformer, node, x, tensor_infos, activations, clip,
+                                    hidden_size, layout)
+    else:
+        raise RuntimeError('Unknown RNN type')
+
+    transformer.mark_for_deletion(node)
+
+
+def _generate_one_direction_LSTM(transformer, X, W, R, B, initial_h, initial_c, P, clip,
+                                act, dtype, hidden_size, batch_size):
+    """Generate subgraph for one direction of unrolled LSTM layer
+
+    Args:
+        transformer (_ModelTransformerHelper): helper for model generation
+        X (list of str): names of tensors in input sequence. Each tensor shape: [batch_size, input_size]
+        W (str): name of concatenated weight tensor: [input, output, forget, cell]
+        R (str): name of concatenated recurrence weights tensor: [input, output, forget, cell]
+        B (str): name of concatenated bias tensor: [input, output, forget, cell]
+        initial_h (str or None): name of tensor containing initial hidden state. Shape [batch_size, hidden_size]
+        initial_c (str or None): name of tensor containing initial cell state. Shape [batch_size, hidden_size]
+        P (str or None): name of concatenated peephole tensor: [input, output, forget]
+        clip (float or None): range which clips input of activations
+        act (dict of str):  activation functions {'f': 'Sigmoid', 'g': 'Tanh', 'h': 'Tanh'}
+        dtype (numpy dtype): data type used in created LSTM operation
+        hidden_size (int): hidden dimension
+        batch_size (int): batch dimension
+    """
+    # one direction LSTM:
+    #
+    # For details see:
+    # https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Changelog.md#LSTM-7
+    #
+    # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+    # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+    # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+    # Ct = ft (.) Ct-1 + it (.) ct
+    # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+    # Ht = ot (.) h(Ct)
+    #
+    # X - input tensor
+    # i - input gate
+    # o - output gate
+    # f - forget gate
+    # c - cell gate
+    # t - time step (t-1 means previous time step)
+    # W[iofc] - W parameter weight matrix for input, output, forget, and cell gates
+    # R[iofc] - R recurrence weight matrix for input, output, forget, and cell gates
+    # Wb[iofc] - W bias vectors for input, output, forget, and cell gates
+    # Rb[iofc] - R bias vectors for input, output, forget, and cell gates
+    # P[iof] - P peephole weight vector for input, output, and forget gates
+    # WB[iofc] - W parameter weight matrix for backward input, output, forget, and cell gates
+    # RB[iofc] - R recurrence weight matrix for backward input, output, forget, and cell gates
+    # WBb[iofc] - W bias vectors for backward input, output, forget, and cell gates
+    # RBb[iofc] - R bias vectors for backward input, output, forget, and cell gates
+    # PB[iof] - P peephole weight vector for backward input, output, and forget gates
+    # H - Hidden state
+
+    seq_length = len(X)
+    state_h_tensors = []
+
+    w_tensors = transformer.make_split(W, split_sizes=[hidden_size] * 4, axis=0)
+    W = {'i': w_tensors[0], 'o': w_tensors[1], 'f': w_tensors[2], 'c': w_tensors[3]}
+
+    r_tensors = transformer.make_split(R, split_sizes=[hidden_size] * 4, axis=0)
+    R = {'i': r_tensors[0], 'o': r_tensors[1], 'f': r_tensors[2], 'c': r_tensors[3]}
+
+    if B is not None:
+        separate_b_tensors = transformer.make_split(
+            B, split_sizes=[hidden_size] * 8, axis=0)
+        b_tensors = []
+        for i in range(4):
+            b_tensors += [
+                transformer.make_add(separate_b_tensors[i], separate_b_tensors[i + 4])
+            ]
+    else:
+        b_tensors = [
+            transformer.make_constant_tensor(
+                np.zeros((hidden_size), dtype=dtype), 'zero_b')
+        ] * 4
+    B = {'i': b_tensors[0], 'o': b_tensors[1], 'f': b_tensors[2], 'c': b_tensors[3]}
+
+    if initial_h is not None:
+        previous_h_state_tensor = initial_h
+    else:
+        previous_h_state_tensor = transformer.make_constant_tensor(
+            np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_h')
+
+    if initial_c is not None:
+        previous_c_state_tensor = initial_c
+    else:
+        previous_c_state_tensor = transformer.make_constant_tensor(
+            np.zeros((batch_size, hidden_size), dtype=dtype), 'initial_c')
+
+    if P is not None:
+        p_tensors = transformer.make_split(P, split_sizes=[hidden_size] * 3, axis=0)
+        P = {'i': p_tensors[0], 'o': p_tensors[1], 'f': p_tensors[2]}
+    else:
+        zero = transformer.make_constant_tensor(
+            np.zeros((hidden_size), dtype=dtype), 'zero_peephole')
+        P = {'i': zero, 'o': zero, 'f': zero}
+
+    for i in range(seq_length):
+        # it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
+        it = transformer.make_gemm(X[i], W['i'], B['i'], trans_b=True)
+        it = transformer.make_gemm(previous_h_state_tensor, R['i'], it, trans_b=True)
+        peephole_it = transformer.make_mul(P['i'], previous_c_state_tensor)
+        it = transformer.make_add(it, peephole_it)
+        if clip is not None:
+            it = transformer.make_clip(it, min=-clip, max=clip)
+        it = transformer.make_act(it, act['f'])
+
+        # ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
+        ft = transformer.make_gemm(X[i], W['f'], B['f'], trans_b=True)
+        ft = transformer.make_gemm(previous_h_state_tensor, R['f'], ft, trans_b=True)
+        peephole_ft = transformer.make_mul(P['f'], previous_c_state_tensor)
+        ft = transformer.make_add(ft, peephole_ft)
+        if clip is not None:
+            ft = transformer.make_clip(ft, min=-clip, max=clip)
+        ft = transformer.make_act(ft, act['f'])
+
+        # ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
+        ct = transformer.make_gemm(X[i], W['c'], B['c'], trans_b=True)
+        ct = transformer.make_gemm(previous_h_state_tensor, R['c'], ct, trans_b=True)
+        if clip is not None:
+            ct = transformer.make_clip(ct, min=-clip, max=clip)
+        ct = transformer.make_act(ct, act['g'])
+
+        # Ct = ft (.) Ct-1 + it (.) ct
+        ft_Ct = transformer.make_mul(ft, previous_c_state_tensor)
+        it_ct = transformer.make_mul(it, ct)
+        Ct = transformer.make_add(ft_Ct, it_ct)
+        previous_c_state_tensor = Ct
+
+        # ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
+        ot = transformer.make_gemm(X[i], W['o'], B['o'], trans_b=True)
+        ot = transformer.make_gemm(previous_h_state_tensor, R['o'], ot, trans_b=True)
+        peephole_ot = transformer.make_mul(P['o'], Ct)
+        ot = transformer.make_add(ot, peephole_ot)
+        if clip is not None:
+            ot = transformer.make_clip(ot, min=-clip, max=clip)
+        ot = transformer.make_act(ot, act['f'])
+
+        # Ht = ot (.) h(Ct)
+        Ht = transformer.make_act(Ct, act['h'])
+        Ht = transformer.make_mul(ot, Ht)
+        previous_h_state_tensor = Ht
+        state_h_tensors += [Ht]
+
+    return (state_h_tensors, previous_c_state_tensor)
+
+
+def _transform_unidirectional_LSTM(transformer, original_node, x, tensor_infos,
+                                  activations, clip, direction, hidden_size, layout):
+    """Generate Simple (forward or reverse) unrolled LSTM
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): unidirectional LSTM operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of length 3 containing names of activation functions
+        clip (float or None): range which clips input of activations
+        direction (str): "forward" or "reverse"
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+    if direction == 'reverse':
+        x.reverse()
+    w = transformer.make_squeeze(inputs[1], axes=[0])
+    r = transformer.make_squeeze(inputs[2], axes=[0])
+
+    b = None
+    if len(inputs) > 3 and inputs[3] != '':
+        b = transformer.make_squeeze(inputs[3], axes=[0])
+
+    initial_h = None
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_squeeze(inputs[5], axes=[direction_dim])
+
+    initial_c = None
+    if len(inputs) > 6 and inputs[6] != '':
+        direction_dim = layout
+        initial_c = transformer.make_squeeze(inputs[6], axes=[direction_dim])
+
+    p = None
+    if len(inputs) > 7 and inputs[7] != '':
+        p = transformer.make_squeeze(inputs[7], axes=[0])
+
+    dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+    batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+    act = {'f': activations[0], 'g': activations[1], 'h': activations[2]}
+
+    state_h_tensors, state_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w, r, b, initial_h, initial_c, p, clip, act, dtype, hidden_size,
+        batch_size)
+
+    y_direction_dim = layout + 1
+    y_h_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for h_state in state_h_tensors:
+        state_layout_tensors += [
+            transformer.make_unsqueeze(h_state, axes=[seq_length_dim, y_direction_dim])
+        ]
+
+    # use low-level interface to attach to existing tensors
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Unsqueeze', [state_h_tensors[-1]], [Y_h], axes=[y_h_direction_dim])
+    Y_c = outputs[2]
+    transformer.make_node(
+        'Unsqueeze', [state_c_tensor], [Y_c], axes=[y_h_direction_dim])
+    if direction == 'reverse':
+        state_layout_tensors.reverse()
+    Y = outputs[0]
+    transformer.make_node(
+        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _transform_bidirectional_LSTM(transformer, original_node, x, tensor_infos, activations,
+                                 clip, hidden_size, layout):
+    """Generate Bidirectional unrolled LSTM
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        original_node (onnx.onnx_ml_pb2.NodeProto): bidirectional LSTM operation to unroll
+        x (list of str): list of input tensors (input tensor split along "time" dimension)
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        activations (list of str): list of length 6, containing names of forward and reverse activations
+        clip (float or None): range which clips input of activations
+        hidden_size (int): size of hidden state
+        layout (int): See attribute description:
+            https://github.com/onnx/onnx/blob/5cf5feef5ec3fd5527b2fdb6c29780e3b705059f/docs/Operators.md#attributes-37
+    """
+
+    inputs = original_node.input
+    outputs = original_node.output
+
+    w = transformer.make_split(inputs[1], split_sizes=[1, 1], axis=0)
+    r = transformer.make_split(inputs[2], split_sizes=[1, 1], axis=0)
+    for d in range(2):
+        w[d] = transformer.make_squeeze(w[d], axes=[0])
+        r[d] = transformer.make_squeeze(r[d], axes=[0])
+
+    b = [None, None]
+    if len(inputs) > 3 and inputs[3] != '':
+        b = transformer.make_split(inputs[3], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            b[d] = transformer.make_squeeze(b[d], axes=[0])
+
+    initial_h = [None, None]
+    if len(inputs) > 5 and inputs[5] != '':
+        direction_dim = layout
+        initial_h = transformer.make_split(
+            inputs[5], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_h[d] = transformer.make_squeeze(initial_h[d], axes=[direction_dim])
+
+    initial_c = [None, None]
+    if len(inputs) > 6 and inputs[6] != '':
+        direction_dim = layout
+        initial_c = transformer.make_split(
+            inputs[6], split_sizes=[1, 1], axis=direction_dim)
+        for d in range(2):
+            initial_c[d] = transformer.make_squeeze(initial_c[d], axes=[direction_dim])
+
+    p = [None, None]
+    if len(inputs) > 7 and inputs[7] != '':
+        p = transformer.make_split(inputs[7], split_sizes=[1, 1], axis=0)
+        for d in range(2):
+            p[d] = transformer.make_squeeze(p[d], axes=[0])
+
+    dtype = _dtype_to_np(tensor_infos[inputs[0]].dtype)
+    batch_size = tensor_infos[inputs[0]].shape[1 - layout]
+
+    act = [{
+        'f': activations[0],
+        'g': activations[1],
+        'h': activations[2]
+    }, {
+        'f': activations[3],
+        'g': activations[4],
+        'h': activations[5]
+    }]
+
+    state_f_h_tensors, state_f_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w[0], r[0], b[0], initial_h[0], initial_c[0], p[0], clip, act[0],
+        dtype, hidden_size, batch_size)
+    x.reverse()
+    state_b_h_tensors, state_b_c_tensor = _generate_one_direction_LSTM(
+        transformer, x, w[1], r[1], b[1], initial_h[1], initial_c[1], p[1], clip, act[1],
+        dtype, hidden_size, batch_size)
+    state_b_h_tensors.reverse()
+
+    y_direction_dim = layout + 1
+    y_c_direction_dim = layout
+    state_layout_tensors = []
+    seq_length_dim = layout
+    for f_h_state, b_h_state in zip(state_f_h_tensors, state_b_h_tensors):
+        state_f_layout_tensors = transformer.make_unsqueeze(
+            f_h_state, axes=[seq_length_dim, y_direction_dim])
+        state_b_layout_tensors = transformer.make_unsqueeze(
+            b_h_state, axes=[seq_length_dim, y_direction_dim])
+        state_layout_tensors += [
+            transformer.make_concat(
+                [state_f_layout_tensors, state_b_layout_tensors], axis=y_direction_dim)
+        ]
+
+    last_f_state_layout_tensor = transformer.make_unsqueeze(
+        state_f_h_tensors[-1], axes=[y_c_direction_dim])
+    last_b_state_layout_tensor = transformer.make_unsqueeze(
+        state_b_h_tensors[0], axes=[y_c_direction_dim])
+
+    Y_h = outputs[1]
+    transformer.make_node(
+        'Concat', [last_f_state_layout_tensor, last_b_state_layout_tensor], [Y_h],
+        axis=y_c_direction_dim)
+
+    Y_f_c = transformer.make_unsqueeze(state_f_c_tensor, axes=[y_c_direction_dim])
+    Y_b_c = transformer.make_unsqueeze(state_b_c_tensor, axes=[y_c_direction_dim])
+    Y_c = outputs[2]
+    transformer.make_node(
+        'Concat', [Y_f_c, Y_b_c], [Y_c], axis=y_c_direction_dim)
+
+    Y = outputs[0]
+    transformer.make_node(
+        'Concat', state_layout_tensors, [Y], axis=seq_length_dim)
+
+
+def _legalize_LSTM(transformer, tensor_infos, node):
+    """Unroll LSTM operation
+
+    Args:
+        transformer (_ModelTransformerHelper): transformation helper
+        tensor_infos (dict from str to _TensorInfo): dict maps tensor name to it's shape and dtype info
+        node (onnx.onnx_ml_pb2.NodeProto): LSTM operation to unroll
+    """
+    inputs = node.input
+    if len(inputs) > 4 and inputs[4] != '':
+        raise NotImplementedError('Variadic length of output is not supported')
+    # attributes
+    activation_alpha = []
+    activation_beta = []
+    activations = ['Sigmoid', 'Tanh', 'Tanh'] * 2
+    clip = None
+    direction = 'forward'
+    hidden_size = 0
+    input_forget = 0
+    layout = 0
+
+    for attr in node.attribute:
+        if attr.name == 'activation_alpha':
+            activation_alpha = attr.floats
+        if attr.name == 'activation_beta':
+            activation_beta = attr.floats
+        if attr.name == 'activations':
+            activations = list(map(lambda item: item.decode('UTF-8'), list(attr.strings)))
+        if attr.name == 'clip':
+            clip = attr.f
+        if attr.name == 'direction':
+            direction = attr.s.decode('UTF-8')
+        if attr.name == 'hidden_size':
+            hidden_size = attr.i
+        if attr.name == 'input_forget':
+            input_forget = attr.i
+        if attr.name == 'layout':
+            layout = attr.i
+
+    if len(activation_alpha) > 0 or len(activation_beta) > 0:
+        raise NotImplementedError('Unsupported parameters for LSTM activations')
+
+    for act in activations:
+        if act not in ['Relu', 'Tanh', 'Sigmoid']:
+            raise NotImplementedError('Unsupported activation function')
+
+    if input_forget != 0:
+        raise NotImplementedError('Unsupported input_forget attribute value')
+
+    seq_length_dim = layout
+    seq_length = tensor_infos[inputs[0]].shape[seq_length_dim]
+    if hidden_size == 0:
+        hidden_size = tensor_infos[inputs[2]].shape[2]
+
+    input_split_tensor = transformer.make_split(
+        inputs[0], split_sizes=[1] * seq_length, axis=seq_length_dim)
+    x = []
+    for i in range(len(input_split_tensor)):
+        input_frame_tensor = input_split_tensor[i]
+        squeezed_frame_tensor = transformer.make_squeeze(input_frame_tensor, axes=[0])
+        x += [squeezed_frame_tensor]
+
+    if direction in ['forward', 'reverse']:
+        _transform_unidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+                                      clip, direction, hidden_size, layout)
+    elif direction == 'bidirectional':
+        _transform_bidirectional_LSTM(transformer, node, x, tensor_infos, activations,
+                                     clip, hidden_size, layout)
+    else:
+        raise RuntimeError('Unknown LSTM type')
+
+    transformer.mark_for_deletion(node)
+
+
+def legalize(model, options):
+    """Replace selected operations in onnx model
+
+    Replaces operations, selected by given options with different operation sequences.
+    For example remove unsupported parts of graph with sequences of supported operations.
+
+    Note that graph is changes inplace.
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+        options (LegalizeOptions):
+    """
+    tensor_infos = _get_tensor_infos(model)
+
+    transformer = _ModelTransformerHelper(model)
+
+    node_id = 0
+    while node_id < len(model.graph.node):
+        node = model.graph.node[node_id]
+        if node.op_type == 'RNN' and options.unroll_rnn:
+            # opset version is required by split operation
+            if model.opset_import[0].version >= 13:
+                raise NotImplementedError(
+                    'Can not generate code with opcode version 13 and greater')
+            transformer.set_insert_id(node_id)
+            _legalize_RNN(transformer, tensor_infos, node)
+            node_id = transformer.get_insert_id()
+        elif node.op_type == 'LSTM' and options.unroll_lstm:
+            if model.opset_import[0].version >= 13:
+                raise NotImplementedError(
+                    'Can not generate code with opcode version 13 and greater')
+            transformer.set_insert_id(node_id)
+            _legalize_LSTM(transformer, tensor_infos, node)
+            node_id = transformer.get_insert_id()
+        node_id += 1
+
+    transformer.delete_marked_nodes()
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        print('usage: ./legalize_onnx.py <path to input model> <path to output model>\n'
+              '\n'
+              '    In stand-alone utility mode this tool provides basic funtionality\n'
+              '    If you want to have more control over applied transformations, use this legalizer as a library')
+        exit(1)
+    options = LegalizeOptions()
+    options.unroll_lstm = True
+    options.unroll_rnn = True
+    model = onnx.load(sys.argv[1])
+    legalize(model, options)
+    onnx.save(model, sys.argv[2])
diff --git a/compiler/one-cmds/tests/CMakeLists.txt b/compiler/one-cmds/tests/CMakeLists.txt

index 6f9f2847e4bf5e303d6bc3af5f650a6cf86ba3c9..caea756c2b653f162334ef4988004ef2bd6f0878 100644 (file)
--- a/compiler/one-cmds/tests/CMakeLists.txt
+++ b/compiler/one-cmds/tests/CMakeLists.txt
@@ -3,6 +3,7 @@
  # Gather test scripts
  file(GLOB TESTITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
  file(GLOB CONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.cfg")
+file(GLOB QCONFIGITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.qconf.json")
  
  # Create a script to run the tests at installation folder
  set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
@@ -39,6 +40,11 @@ foreach(CONFIGITEM IN ITEMS ${CONFIGITEMS})
    install(FILES ${CONFIGITEM} DESTINATION test)
  endforeach(CONFIGITEM)
  
+foreach(QCONFIGITEM IN ITEMS ${QCONFIGITEMS})
+  get_filename_component(ITEM_PREFIX ${QCONFIGITEM} NAME_WE)
+  install(FILES ${QCONFIGITEM} DESTINATION test)
+endforeach(QCONFIGITEM)
+
  file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
  
  file(APPEND "${DRIVER_SCRIPT}"
@@ -52,6 +58,8 @@ fi\n
  
  set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
  set(PREPROCESS_IMAGES_PY "${CMAKE_CURRENT_SOURCE_DIR}/preprocess_images.py")
+set(ONNX_LEGALIZE_RUN_COMPARE "${CMAKE_CURRENT_SOURCE_DIR}/onnx_legalize_run_compare.py")
+set(PRINT_ONNX_MODEL "${CMAKE_CURRENT_SOURCE_DIR}/print_onnx_model.py")
  
  install(FILES ${DRIVER_SCRIPT}
          PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
@@ -71,5 +79,23 @@ install(FILES ${PREPROCESS_IMAGES_PY}
                      WORLD_READ
          DESTINATION test)
  
+install(FILES ${ONNX_LEGALIZE_RUN_COMPARE}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
+install(FILES ${PRINT_ONNX_MODEL}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION test)
+
  install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/README.txt
          DESTINATION test)
+
+add_subdirectory(onnx-operations)
+
+if(ENABLE_ONE_IMPORT_PYTORCH)
+  add_subdirectory(pytorch-operations)
+endif(ENABLE_ONE_IMPORT_PYTORCH)
diff --git a/compiler/one-cmds/tests/one-quantize_009.qconf.json b/compiler/one-cmds/tests/one-quantize_009.qconf.json

new file mode 100644 (file)

index 0000000..ac274e8
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.qconf.json
@@ -0,0 +1,36 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "InceptionV3/InceptionV3/Conv2d_2b_3x3/Relu;InceptionV3/InceptionV3/Conv2d_2b_3x3/BatchNorm/FusedBatchNorm;InceptionV3/InceptionV3/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D;InceptionV3/InceptionV3/Conv2d_2b_3x3/Conv2D",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/MaxPool_5a_3x3/MaxPool",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_5b/concat",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/InceptionV3/Mixed_7c/concat",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "InceptionV3/Predictions/Reshape_1",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/one-cmds/tests/one-quantize_009.test b/compiler/one-cmds/tests/one-quantize_009.test

new file mode 100644 (file)

index 0000000..aa06703
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_009.test
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.random.quantized.mixed.circle"
+
+rm -rf ${outputfile}
+
+# to create inception_v3.circle
+if [[ ! -s ${inputfile} ]]; then
+  /bin/bash one-import_001.test > /dev/null 2>&1
+  return_code=$?
+  if [[ ${return_code} != 0 ]]; then
+    trap_err_onexit
+  fi
+fi
+
+# run test without input data
+one-quantize \
+--input_dtype float32 \
+--quantized_dtype uint8 \
+--granularity channel \
+--quant_config one-quantize_009.qconf.json \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt

new file mode 100644 (file)

index 0000000..e6b2b35
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/CMakeLists.txt
@@ -0,0 +1,86 @@
+# Install one-cmds test scripts for onnx models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+
+set(TEST_DST test/onnx-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(ONNX_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+  # generate test script
+  file(WRITE  "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+  file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+  file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+  file(APPEND "${TEST_SCRIPT}" "{\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} FAILED\"\n")
+  file(APPEND "${TEST_SCRIPT}" "exit 255\n")
+  file(APPEND "${TEST_SCRIPT}" "}\n")
+  file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+  file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+  file(APPEND "${TEST_SCRIPT}" "one-import-onnx --input_path=${TEST_ITEM}.onnx --output_path=${TEST_ITEM}.circle\
+    ${ONNX_IMPORT_OPTIONS} &> /dev/null\n")
+  file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit\n")
+  file(APPEND "${TEST_SCRIPT}" "fi\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+  install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE  "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+
+install(FILES "${DRIVER_SCRIPT}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+        DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/onnx-operations/README.md b/compiler/one-cmds/tests/onnx-operations/README.md

new file mode 100644 (file)

index 0000000..928fb84
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/README.md
@@ -0,0 +1,28 @@
+## Overview 
+
+This directory contains auxilliary tests for small onnx target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+   - you need to run this only once
+   - read 'doc/how-to-prepare-virtualenv.txt' for more information
+   ```
+   bin/one-prepare-venv
+   ```
+2) run 'test/onnx-operations/prepare_test_materials.sh' to download test material models
+   - you need to run this only once
+   - you need internet connection to download files
+   - you may need to install 'wget' and 'unzip' packages
+   ```
+   test/onnx-operations/prepare_test_materials.sh
+   ```
+3) run 'test/onnx-operations/runtestall.sh' to run the test
+   ```
+   test/onnx-operations/runtestall.sh
+   ```
diff --git a/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh

new file mode 100644 (file)

index 0000000..274a60f
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+  python3 ptem.py $(basename ${test_case})
+done
+
+cp output/*.onnx .
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/onnx_legalize_run_compare.py b/compiler/one-cmds/tests/onnx_legalize_run_compare.py

new file mode 100644 (file)

index 0000000..9b02b74
--- /dev/null
+++ b/compiler/one-cmds/tests/onnx_legalize_run_compare.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnxruntime as rt
+import onnx
+import sys
+import numpy as np
+import importlib.util
+
+
+def _generate_inputs(model):
+    """Generate random inputs for given model
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+
+    Returns:
+        dict from str to numpy.ndarray: generated inputs
+    """
+    inputs = {}
+    for input in model.graph.input:
+        # check if elem type is float32
+        # list of types could be extended, this is a property of current testsuite
+        assert (
+            input.type.tensor_type.elem_type == onnx.TensorProto.DataType.Value("FLOAT"))
+        input_shape = []
+        for dim in input.type.tensor_type.shape.dim:
+            input_shape += [dim.dim_value]
+        inputs[input.name] = np.random.random(input_shape).astype(np.float32)
+    return inputs
+
+
+def _run_model(model, inputs):
+    """Run given model
+
+    Args:
+        model (onnx.onnx_ml_pb2.ModelProto): target model
+        inputs (dict from str to numpy.ndarray): sample inputs
+
+    Returns:
+        list of numpy.ndarray: inference outputs
+    """
+    output_names = list(map(lambda output: output.name, model.graph.output))
+    session = rt.InferenceSession(model.SerializeToString())
+    outputs = session.run(output_names, inputs)
+    return outputs
+
+
+def _compare_results(ref_outputs, test_outputs, tolerance):
+    """Generate random inputs for given model
+
+    Args:
+        ref_outputs (list of numpy.ndarray): reference values (original model results)
+        test_outputs (list of numpy.ndarray): tested values (modified model results)
+        tolerance (float): maximum acceptable relative difference
+
+    Returns:
+        bool: True if outputs considered equal, False otherwise
+    """
+    num_outputs = len(ref_outputs)
+    assert (len(test_outputs) == num_outputs)
+    for i in range(num_outputs):
+        if ref_outputs[i].shape != test_outputs[i].shape:
+            print("output {} shape mismatch: ref({}) vs test({})".format(
+                i, ref_outputs[i].shape, test_outputs[i].shape))
+            return False
+
+        abs_difference = np.abs(ref_outputs[i] - test_outputs[i])
+        abs_ref_maximum = np.abs(ref_outputs[i]).max()
+        peak_error = abs_difference.max() / abs_ref_maximum
+
+        if peak_error > tolerance:
+            print("output {} peak error to value ratio {} is too big".format(
+                i, peak_error))
+            return False
+    return True
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 6:
+        exit('expecting 5 arguments:\n'
+             '  - path to input model\n'
+             '  - path to "legalized" model\n'
+             '  - path to onnx_legalizer.py\n'
+             '  - base name for generated test inputs\n'
+             '  - output tolerance')
+    input_model_path = sys.argv[1]
+    output_model_path = sys.argv[2]
+    onnx_legalizer_path = sys.argv[3]
+    input_dump_path = sys.argv[4]
+    tolerance = float(sys.argv[5])
+
+    onnx_legalizer_spec = importlib.util.spec_from_file_location(
+        "onnx_legalizer", onnx_legalizer_path)
+    onnx_legalizer = importlib.util.module_from_spec(onnx_legalizer_spec)
+    onnx_legalizer_spec.loader.exec_module(onnx_legalizer)
+
+    model = onnx.load(input_model_path)
+
+    inputs = _generate_inputs(model)
+
+    for i in inputs:
+        np.save('{}_{}.npy'.format(input_dump_path, i), inputs[i])
+
+    ref_outputs = _run_model(model, inputs)
+
+    options = onnx_legalizer.LegalizeOptions()
+    options.unroll_rnn = True
+    options.unroll_lstm = True
+    onnx_legalizer.legalize(model, options)
+
+    with open(output_model_path, 'wb') as f:
+        f.write(model.SerializeToString())
+
+    test_outputs = _run_model(model, inputs)
+
+    if not _compare_results(ref_outputs, test_outputs, tolerance):
+        exit('comparison failed')
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh

index 7f269530cfd25d4e1ccf671df46c2469a7a6fb81..c80c5983496ce06b0e6f3624650fbebca47717b9 100644 (file)
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -91,6 +91,39 @@ if [[ ! -s "onnx_conv2d_conv2d.onnx" ]]; then
      # https://github.com/Samsung/ONE/issues/5577#issuecomment-755078444
  fi
  
+function files_missing() {
+    condition="test "
+
+    for f in "${@}"; do
+        condition="${condition} ! -s ${f} -o"
+    done
+
+    # last condition is always false to properly close last "or"
+    condition="${condition} -z non_zero_string "
+    ${condition}
+}
+
+declare -a TEST_RECCURENT_MODELS=(\
+  "RNN.onnx" "RNN-nobias.onnx" "RNN-relu.onnx" "RNN-bi.onnx" "RNN-noinit.onnx"\
+  "LSTM.onnx" "LSTM-bi.onnx" "LSTM-noinit.onnx" "LSTM-nobias.onnx"
+)
+
+if files_missing "${TEST_RECCURENT_MODELS[@]}"; then
+    rm -rf test_onnx_recurrent_models.zip
+    wget https://github.com/Samsung/ONE/files/8067909/test_onnx_recurrent_models.zip
+    unzip test_onnx_recurrent_models.zip
+    # https://github.com/Samsung/ONE/issues/8395#issuecomment-1040072097
+fi
+
+declare -a NEG_TEST_RECCURENT_MODELS=("rnn_variable.onnx" "lstm_variable.onnx")
+
+if files_missing "${NEG_TEST_RECCURENT_MODELS[@]}"; then
+    rm -rf neg_test_onnx_recurrent_models.zip
+    wget https://github.com/Samsung/ONE/files/8137183/neg_test_onnx_recurrent_models.zip
+    unzip neg_test_onnx_recurrent_models.zip
+    # https://github.com/Samsung/ONE/issues/8395#issuecomment-1050364375
+fi
+
  # prepare 'inception_v3.circle' file used for quantization test
  inputfile="./inception_v3.pb"
  outputfile="./inception_v3.circle"
diff --git a/compiler/one-cmds/tests/print_onnx_model.py b/compiler/one-cmds/tests/print_onnx_model.py

new file mode 100644 (file)

index 0000000..ecab0f6
--- /dev/null
+++ b/compiler/one-cmds/tests/print_onnx_model.py
@@ -0,0 +1,20 @@
+# Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import onnx
+import sys
+
+if __name__ == '__main__':
+    model = onnx.load(sys.argv[1])
+    print(model)
diff --git a/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt

new file mode 100644 (file)

index 0000000..10f30a5
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/CMakeLists.txt
@@ -0,0 +1,109 @@
+# Install one-cmds test scripts for pytorch models
+
+# Gather test scripts
+set(EXAMPLES_DIR "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/examples")
+file(GLOB TEST_EXAMPLES RELATIVE "${EXAMPLES_DIR}" "${EXAMPLES_DIR}/*")
+file(GLOB SPECIAL_TEST_ITEMS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "./*.test")
+
+set(TEST_DST test/pytorch-operations)
+
+install(DIRECTORY "${NNAS_PROJECT_SOURCE_DIR}/res/PyTorchExamples/" DESTINATION "${TEST_DST}")
+
+set(PYTORCH_IMPORT_OPTIONS "--unroll_rnn --unroll_lstm")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  set(TEST_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/${TEST_ITEM}.test")
+
+  # generate test script
+  file(WRITE  "${TEST_SCRIPT}" "#!/bin/bash\n\n")
+  file(APPEND "${TEST_SCRIPT}" "filename_ext=\"\$(basename -- $0)\"\n")
+  file(APPEND "${TEST_SCRIPT}" "filename=\"\${filename_ext%.*}\"\n")
+  file(APPEND "${TEST_SCRIPT}" "trap_err_onexit()\n")
+  file(APPEND "${TEST_SCRIPT}" "{\n")
+  file(APPEND "${TEST_SCRIPT}" "  echo \"\${filename_ext} FAILED\"\n")
+  file(APPEND "${TEST_SCRIPT}" "  exit 255\n")
+  file(APPEND "${TEST_SCRIPT}" "}\n")
+  file(APPEND "${TEST_SCRIPT}" "trap trap_err_onexit ERR\n")
+  file(APPEND "${TEST_SCRIPT}" "outputfile=\"${TEST_ITEM}.circle\"\n")
+  file(APPEND "${TEST_SCRIPT}" "input_shapes=\$(head -n 1 ${TEST_ITEM}.spec)\n")
+  file(APPEND "${TEST_SCRIPT}" "input_types=\$(tail -n 1 ${TEST_ITEM}.spec)\n")
+  file(APPEND "${TEST_SCRIPT}" "one-import-pytorch --input_path=${TEST_ITEM}.pth --output_path=${TEST_ITEM}.circle\
+    ${PYTORCH_IMPORT_OPTIONS} --input_shapes=\${input_shapes} --input_types=\${input_types} &> /dev/null\n")
+  file(APPEND "${TEST_SCRIPT}" "if [[ ! -s \"\${outputfile}\" ]]; then\n")
+  file(APPEND "${TEST_SCRIPT}" "  trap_err_onexit\n")
+  file(APPEND "${TEST_SCRIPT}" "fi\n")
+  file(APPEND "${TEST_SCRIPT}" "echo \"\${filename_ext} SUCCESS\"\n")
+
+  install(FILES "${TEST_SCRIPT}" DESTINATION "${TEST_DST}")
+endforeach(TEST_ITEM)
+
+
+# Create a script to run the tests at installation folder
+set(DRIVER_SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/runtestall.sh")
+
+file(WRITE  "${DRIVER_SCRIPT}" "#!/bin/bash\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "SCRIPT_PATH=$(cd $(dirname \${BASH_SOURCE[0]}) && pwd)\n")
+file(APPEND "${DRIVER_SCRIPT}" "pushd $SCRIPT_PATH > /dev/null\n")
+file(APPEND "${DRIVER_SCRIPT}" "rm -rf runtestall.log\n")
+file(APPEND "${DRIVER_SCRIPT}" "export PATH=$SCRIPT_PATH/../bin:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "if [[ $# -ge 1 ]]; then\n")
+file(APPEND "${DRIVER_SCRIPT}" "  USER_PATH=$1\n")
+file(APPEND "${DRIVER_SCRIPT}" "  export PATH=$USER_PATH:$PATH\n")
+file(APPEND "${DRIVER_SCRIPT}" "fi\n")
+file(APPEND "${DRIVER_SCRIPT}" "\n")
+file(APPEND "${DRIVER_SCRIPT}" "# refer https://github.com/Samsung/ONE/issues/6286\n")
+file(APPEND "${DRIVER_SCRIPT}" "set -o pipefail\n\n")
+file(APPEND "${DRIVER_SCRIPT}" "fail_count=0\n")
+file(APPEND "${DRIVER_SCRIPT}" "trap \"(( fail_count++ ))\" ERR\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${TEST_EXAMPLES})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}.test\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "\necho \"special test items\" | tee -a runtestall.log\n\n")
+
+foreach(TEST_ITEM IN ITEMS ${SPECIAL_TEST_ITEMS})
+  file(APPEND "${DRIVER_SCRIPT}" "/bin/bash \"${TEST_ITEM}\" | tee -a runtestall.log\n")
+endforeach(TEST_ITEM)
+
+file(APPEND "${DRIVER_SCRIPT}" "popd > /dev/null\n\n")
+
+file(APPEND "${DRIVER_SCRIPT}"
+"if [[ $fail_count != 0 ]]; then
+  echo \"$fail_count TESTS FAILED\"
+  exit 255
+else
+  echo \"ALL TESTS PASSED!\"
+fi\n
+")
+
+set(PREPARE_TEST_MATERIALS_SH "${CMAKE_CURRENT_SOURCE_DIR}/prepare_test_materials.sh")
+set(EXAMPLE_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/example_generator.py")
+set(AUX_GENERATOR "${CMAKE_CURRENT_SOURCE_DIR}/aux_generator.py")
+
+install(FILES "${DRIVER_SCRIPT}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${PREPARE_TEST_MATERIALS_SH}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+                    GROUP_READ GROUP_EXECUTE
+                    WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${EXAMPLE_GENERATOR}" "${AUX_GENERATOR}"
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES ${SPECIAL_TEST_ITEMS}
+        PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE
+        GROUP_READ GROUP_EXECUTE
+        WORLD_READ WORLD_EXECUTE
+        DESTINATION "${TEST_DST}")
+
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+        DESTINATION "${TEST_DST}")
diff --git a/compiler/one-cmds/tests/pytorch-operations/README.md b/compiler/one-cmds/tests/pytorch-operations/README.md

new file mode 100644 (file)

index 0000000..231a10e
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/README.md
@@ -0,0 +1,28 @@
+## Overview 
+
+This directory contains auxilliary tests for small pytorch target models.
+
+Most of the models contains single operations, but some contains multiple operations, that represents one operation with complex semantics.
+
+Models for these tests are taken from res/PyTorchExamples.
+
+## To run all tests
+
+Steps:
+1) run 'one-prepare-venv' in bin folder to prepare python virtual-env with TensorFlow
+   - you need to run this only once
+   - read 'doc/how-to-prepare-virtualenv.txt' for more information
+   ```
+   bin/one-prepare-venv
+   ```
+2) run 'test/pytorch-operations/prepare_test_materials.sh' to download test material models
+   - you need to run this only once
+   - you need internet connection to download files
+   - you may need to install 'wget' and 'unzip' packages
+   ```
+   test/pytorch-operations/prepare_test_materials.sh
+   ```
+3) run 'test/pytorch-operations/runtestall.sh' to run the test
+   ```
+   test/pytoch-operations/runtestall.sh
+   ```
diff --git a/compiler/one-cmds/tests/pytorch-operations/aux_generator.py b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py

new file mode 100644 (file)

index 0000000..6c9afcd
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/aux_generator.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch aux tests generator
+
+import torch
+import torch.nn as nn
+import json
+import zipfile
+import os
+
+
+# model
+class net_abs(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.abs(input)
+
+
+if __name__ == '__main__':
+    model = net_abs()
+    # save "entire" model for entire_model.test
+    torch.save(model, 'entire_model.pth')
+
+    # save state_dict file for state_dict_model.test
+    state_dict_path = 'state_dict_model.pth'
+    torch.save(model.state_dict(), state_dict_path)
+
+    # create files for mar_torchscript_model.test
+    torchscript_path = 'torchscript_model.pth'
+    inp = torch.randn(1, 2, 3, 3)
+    traced_model = torch.jit.trace(model, inp)
+    torch.jit.save(traced_model, torchscript_path)
+    # create manifest
+    manifest = {}
+    manifest['createdOn'] = '11/11/1111 11:11:11'
+    manifest['runtime'] = 'python'
+    manifest['model'] = {}
+    manifest['model']['modelName'] = 'torchscript_model',
+    manifest['model']['serializedFile'] = torchscript_path
+    manifest['model']['handler'] = 'image_classifier'
+    manifest['model']['modelVersion'] = '1.0'
+    manifest['archiverVersion'] = '0.4.2'
+
+    with zipfile.ZipFile('mar_torchscript_model.mar', 'w') as mar_file:
+        with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+            manifest_file.write(json.dumps(manifest).encode())
+        mar_file.write(torchscript_path)
+
+    # create files for mar_state_dict_model.test
+    model_file_path = os.path.basename(__file__)
+    # create manifest
+    manifest = {}
+    manifest['createdOn'] = '11/11/1111 11:11:11'
+    manifest['runtime'] = 'python'
+    manifest['model'] = {}
+    manifest['model']['modelName'] = 'state_dict_model',
+    manifest['model']['serializedFile'] = state_dict_path
+    manifest['model']['handler'] = 'image_classifier'
+    manifest['model']['modelFile'] = model_file_path
+    manifest['model']['modelVersion'] = '1.0'
+    manifest['archiverVersion'] = '0.4.2'
+
+    with zipfile.ZipFile('mar_state_dict_model.mar', 'w') as mar_file:
+        with mar_file.open('MAR-INF/MANIFEST.json', 'w') as manifest_file:
+            manifest_file.write(json.dumps(manifest).encode())
+        mar_file.write(state_dict_path)
+        mar_file.write(model_file_path)
diff --git a/compiler/one-cmds/tests/pytorch-operations/entire_model.test b/compiler/one-cmds/tests/pytorch-operations/entire_model.test

new file mode 100644 (file)

index 0000000..a72a56f
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/entire_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model stored in python file and serialized "entire" model.
+# "Entire" model is serialized with `torch.save(model)` method.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="entire_model.circle"
+
+# run test
+one-import-pytorch --input_path=entire_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/example_generator.py b/compiler/one-cmds/tests/pytorch-operations/example_generator.py

new file mode 100644 (file)

index 0000000..20a80c8
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/example_generator.py
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# PyTorch Example manager
+
+import torch
+import importlib
+import argparse
+import os
+
+from pathlib import Path
+
+print("PyTorch version=", torch.__version__)
+
+parser = argparse.ArgumentParser(description='Process PyTorch python examples')
+
+parser.add_argument('examples', metavar='EXAMPLES', nargs='+')
+
+args = parser.parse_args()
+
+output_folder = "./"
+
+Path(output_folder).mkdir(parents=True, exist_ok=True)
+
+
+class JitWrapper(torch.nn.Module):
+    def __init__(self, model):
+        super().__init__()
+        self.model = model
+
+    def forward(self, *args):
+        if len(args) == 1:
+            return self.model.forward(args[0])
+        else:
+            return self.model.forward(args)
+
+
+for example in args.examples:
+    print("Generate '" + example + ".pth'", end='')
+    # load example code
+    # replace - with _ in name, otherwise pytorch generates invalid torchscript
+    module_name = "examples." + example.replace('-', '_')
+    module_loader = importlib.machinery.SourceFileLoader(
+        module_name, os.path.join("examples", example, "__init__.py"))
+    module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+    module = importlib.util.module_from_spec(module_spec)
+    module_loader.exec_module(module)
+
+    jittable_model = JitWrapper(module._model_)
+
+    traced_model = torch.jit.trace(jittable_model, module._dummy_)
+    # save .pth
+    torch.jit.save(traced_model, output_folder + example + ".pth")
+
+    input_shapes = ""
+    input_types = ""
+
+    input_samples = module._dummy_
+    if isinstance(input_samples, torch.Tensor):
+        input_samples = [input_samples]
+    for inp_idx in range(len(input_samples)):
+        input_data = input_samples[inp_idx]
+
+        shape = input_data.shape
+        for dim in range(len(shape)):
+            input_shapes += str(shape[dim])
+            if dim != len(shape) - 1:
+                input_shapes += ","
+
+        if input_data.dtype == torch.bool:
+            input_types += "bool"
+        elif input_data.dtype == torch.uint8:
+            input_types += "uint8"
+        elif input_data.dtype == torch.int8:
+            input_types += "int8"
+        elif input_data.dtype == torch.int16:
+            input_types += "int16"
+        elif input_data.dtype == torch.int32:
+            input_types += "int32"
+        elif input_data.dtype == torch.int64:
+            input_types += "int16"
+        elif input_data.dtype == torch.float16:
+            input_types += "float32"
+        elif input_data.dtype == torch.float32:
+            input_types += "float32"
+        elif input_data.dtype == torch.float64:
+            input_types += "float64"
+        elif input_data.dtype == torch.complex64:
+            input_types += "complex64"
+        elif input_data.dtype == torch.complex128:
+            input_types += "complex128"
+        else:
+            raise ValueError('unsupported dtype')
+
+        if inp_idx != len(input_samples) - 1:
+            input_shapes += ":"
+            input_types += ","
+
+    with open(example + ".spec", "w") as spec_file:
+        print(input_shapes, file=spec_file)
+        print(input_types, file=spec_file)
+
+    print(" - Done")
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test

new file mode 100644 (file)

index 0000000..9892dbb
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_state_dict_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains python source of the model and serialized state_dict.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_state_dict_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test

new file mode 100644 (file)

index 0000000..3ac38a4
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/mar_torchscript_model.test
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import .mar file.
+# .mar file contains TorchScript.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="mar_torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=mar_torchscript_model.mar --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh

new file mode 100644 (file)

index 0000000..5f38610
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/prepare_test_materials.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SCRIPT_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+pushd $SCRIPT_PATH > /dev/null
+
+for test_case in examples/*; do
+  python3 example_generator.py $(basename ${test_case})
+done
+
+python3 aux_generator.py
+
+popd > /dev/null
diff --git a/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test

new file mode 100644 (file)

index 0000000..ecd2a81
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/state_dict_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import NN model from .py file and serialized state_dict file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="state_dict_model.circle"
+
+# run test
+one-import-pytorch --input_path=state_dict_model.pth --python_path=aux_generator.py --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test

new file mode 100644 (file)

index 0000000..590e5b3
--- /dev/null
+++ b/compiler/one-cmds/tests/pytorch-operations/torchscript_model.test
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Test one-import-pytorch ability to import TorchScript file.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+outputfile="torchscript_model.circle"
+
+# run test
+one-import-pytorch --input_path=torchscript_model.pth --output_path=${outputfile} --input_shapes=1,2,3,3 --input_types=float32 &> /dev/null
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py

index 5d84c2bd56481bd01b69a49e87a41b8647d08acc..be0322acabc5890a2ce0a93c0618f2f524529b6f 100644 (file)
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -17,80 +17,13 @@
  import argparse
  import configparser
  import glob
+import importlib
  import ntpath
  import os
  import subprocess
  import sys
  
-
-class _CONSTANT:
-    __slots__ = ()  # This prevents access via __dict__.
-    OPTIMIZATION_OPTS = (
-        # (OPTION_NAME, HELP_MESSAGE)
-        ('O1', 'enable O1 optimization pass'),
-        ('convert_nchw_to_nhwc',
-         'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
-         ),
-        ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
-        ('nchw_to_nhwc_input_shape',
-         'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
-        ('nchw_to_nhwc_output_shape',
-         'convert the output shape of the model (argument for convert_nchw_to_nhwc)'),
-        ('fold_add_v2', 'fold AddV2 op with constant inputs'),
-        ('fold_cast', 'fold Cast op with constant input'),
-        ('fold_dequantize', 'fold Dequantize op'),
-        ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
-        ('fold_sparse_to_dense', 'fold SparseToDense op'),
-        ('forward_reshape_to_unaryop', 'Forward Reshape op'),
-        ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
-        ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
-        ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
-        ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
-        ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
-        ('fuse_bcq', 'apply Binary Coded Quantization'),
-        ('fuse_preactivation_batchnorm',
-         'fuse BatchNorm operators of pre-activations to Convolution op'),
-        ('fuse_mean_with_mean', 'fuse two consecutive Mean ops'),
-        ('fuse_transpose_with_mean',
-         'fuse Mean with a preceding Transpose under certain conditions'),
-        ('make_batchnorm_gamma_positive',
-         'make negative gamma of BatchNorm to a small positive value (1e-10).'
-         ' Note that this pass can change the execution result of the model.'
-         ' So, use it only when the impact is known to be acceptable.'),
-        ('fuse_activation_function', 'fuse Activation function to a preceding operator'),
-        ('fuse_instnorm', 'fuse ops to InstanceNorm operator'),
-        ('replace_cw_mul_add_with_depthwise_conv',
-         'replace channel-wise Mul/Add with DepthwiseConv2D'),
-        ('remove_fakequant', 'remove FakeQuant ops'),
-        ('remove_quantdequant', 'remove Quantize-Dequantize sequence'),
-        ('remove_redundant_reshape', 'fuse or remove subsequent Reshape ops'),
-        ('remove_redundant_transpose', 'fuse or remove subsequent Transpose ops'),
-        ('remove_unnecessary_reshape', 'remove unnecessary reshape ops'),
-        ('remove_unnecessary_slice', 'remove unnecessary slice ops'),
-        ('remove_unnecessary_strided_slice', 'remove unnecessary strided slice ops'),
-        ('remove_unnecessary_split', 'remove unnecessary split ops'),
-        ('resolve_customop_add', 'convert Custom(Add) op to Add op'),
-        ('resolve_customop_batchmatmul',
-         'convert Custom(BatchMatmul) op to BatchMatmul op'),
-        ('resolve_customop_matmul', 'convert Custom(Matmul) op to Matmul op'),
-        ('resolve_customop_max_pool_with_argmax',
-         'convert Custom(MaxPoolWithArgmax) to net of builtin operators'),
-        ('shuffle_weight_to_16x1float32',
-         'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
-         ' Note that it only converts weights whose row is a multiple of 16'),
-        ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
-        ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
-        ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
-        ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
-        ('substitute_strided_slice_to_reshape',
-         'convert certain condition StridedSlice to Reshape'),
-        ('substitute_transpose_to_reshape',
-         'convert certain condition Transpose to Reshape'),
-        ('transform_min_max_to_relu6', 'transform Minimum-Maximum pattern to Relu6 op'),
-        ('transform_min_relu_to_relu6', 'transform Minimum(6)-Relu pattern to Relu6 op'))
-
-
-_CONSTANT = _CONSTANT()
+import onelib.constant as _constant
  
  
  def _add_default_arg(parser):
@@ -116,7 +49,10 @@ def _add_default_arg(parser):
  
  def is_accumulated_arg(arg, driver):
      if driver == "one-quantize":
-        if arg == "tensor_name" or arg == "scale" or arg == "zero_point":
+        accumulables = [
+            "tensor_name", "scale", "zero_point", "src_tensor_name", "dst_tensor_name"
+        ]
+        if arg in accumulables:
              return True
  
      return False
@@ -189,83 +125,6 @@ def _parse_cfg(args, driver_name):
                      setattr(args, key, config[secton_to_run][key])
  
  
-def _make_tf2tfliteV2_cmd(args, driver_path, input_path, output_path):
-    """make a command for running tf2tfliteV2.py"""
-    cmd = [sys.executable, os.path.expanduser(driver_path)]
-    # verbose
-    if _is_valid_attr(args, 'verbose'):
-        cmd.append('--verbose')
-    # model_format
-    if _is_valid_attr(args, 'model_format_cmd'):
-        cmd.append(getattr(args, 'model_format_cmd'))
-    elif _is_valid_attr(args, 'model_format'):
-        cmd.append('--' + getattr(args, 'model_format'))
-    else:
-        cmd.append('--graph_def')  # default value
-    # converter version
-    if _is_valid_attr(args, 'converter_version_cmd'):
-        cmd.append(getattr(args, 'converter_version_cmd'))
-    elif _is_valid_attr(args, 'converter_version'):
-        cmd.append('--' + getattr(args, 'converter_version'))
-    else:
-        cmd.append('--v1')  # default value
-    # input_path
-    if _is_valid_attr(args, 'input_path'):
-        cmd.append('--input_path')
-        cmd.append(os.path.expanduser(input_path))
-    # output_path
-    if _is_valid_attr(args, 'output_path'):
-        cmd.append('--output_path')
-        cmd.append(os.path.expanduser(output_path))
-    # input_arrays
-    if _is_valid_attr(args, 'input_arrays'):
-        cmd.append('--input_arrays')
-        cmd.append(getattr(args, 'input_arrays'))
-    # input_shapes
-    if _is_valid_attr(args, 'input_shapes'):
-        cmd.append('--input_shapes')
-        cmd.append(getattr(args, 'input_shapes'))
-    # output_arrays
-    if _is_valid_attr(args, 'output_arrays'):
-        cmd.append('--output_arrays')
-        cmd.append(getattr(args, 'output_arrays'))
-
-    return cmd
-
-
-def _make_tflite2circle_cmd(driver_path, input_path, output_path):
-    """make a command for running tflite2circle"""
-    cmd = [driver_path, input_path, output_path]
-    return [os.path.expanduser(c) for c in cmd]
-
-
-def _make_circle2circle_cmd(args, driver_path, input_path, output_path):
-    """make a command for running circle2circle"""
-    cmd = [os.path.expanduser(c) for c in [driver_path, input_path, output_path]]
-    # profiling
-    if _is_valid_attr(args, 'generate_profile_data'):
-        cmd.append('--generate_profile_data')
-    # optimization pass(only true/false options)
-    # TODO support options whose number of arguments is more than zero
-    for opt in _CONSTANT.OPTIMIZATION_OPTS:
-        if _is_valid_attr(args, opt[0]):
-            # ./driver --opt[0]
-            if type(getattr(args, opt[0])) is bool:
-                cmd.append('--' + opt[0])
-            """
-            This condition check is for config file interface, usually would be
-             SomeOption=True
-            but user can write as follows while development
-             SomeOption=False
-            instead of removing SomeOption option
-            """
-            if type(getattr(args, opt[0])) is str and not getattr(
-                    args, opt[0]).lower() in ['false', '0', 'n']:
-                cmd.append('--' + opt[0])
-
-    return cmd
-
-
  def _print_version_and_exit(file_path):
      """print version of the file located in the file_path"""
      script_path = os.path.realpath(file_path)
@@ -368,3 +227,34 @@ def _get_optimization_list(get_name=False):
          opt_list = [_remove_suffix(s, '.cfg') for s in opt_list]
  
      return opt_list
+
+
+def _detect_one_import_drivers(search_path):
+    """Looks for import drivers in given directory
+
+    Args:
+        search_path: path to the directory where to search import drivers
+
+    Returns:
+    dict: each entry is related to single detected driver,
+          key is a config section name, value is a driver name
+
+    """
+    import_drivers_dict = {}
+    for module_name in os.listdir(search_path):
+        full_path = os.path.join(search_path, module_name)
+        if not os.path.isfile(full_path):
+            continue
+        if module_name.find("one-import-") != 0:
+            continue
+        module_loader = importlib.machinery.SourceFileLoader(module_name, full_path)
+        module_spec = importlib.util.spec_from_loader(module_name, module_loader)
+        module = importlib.util.module_from_spec(module_spec)
+        try:
+            module_loader.exec_module(module)
+            if hasattr(module, "get_driver_cfg_section"):
+                section = module.get_driver_cfg_section()
+                import_drivers_dict[section] = module_name
+        except:
+            pass
+    return import_drivers_dict
diff --git a/compiler/oneco/CMakeLists.txt b/compiler/oneco/CMakeLists.txt

index 418bc27ac4bf3d426265624b88e5a42ecbeaeabe..951194d9d2b5634ea0856d6f90d01a07a8b61544 100644 (file)
--- a/compiler/oneco/CMakeLists.txt
+++ b/compiler/oneco/CMakeLists.txt
@@ -22,11 +22,11 @@ target_link_libraries(moco_onnx_frontend PUBLIC moco_onnx_proto)
  target_link_libraries(moco_onnx_frontend PUBLIC loco)
  target_link_libraries(moco_onnx_frontend PRIVATE cwrap)
  
-nnas_find_package(GTest QUIET)
-
-if(NOT GTest_FOUND)
+if(NOT ENABLE_TEST)
    return()
-endif(NOT GTest_FOUND)
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest QUIET)
  
  add_executable(moco_onnx_frontend_test ${TESTS})
  target_include_directories(moco_onnx_frontend_test PRIVATE src)
diff --git a/compiler/pepper-strcast/CMakeLists.txt b/compiler/pepper-strcast/CMakeLists.txt

index 5f87e948888045769eda3bfec2e13bdd64c3a852..bcc07f48220ff2827fcdcad9a7bcbd4c7f8114df 100644 (file)
--- a/compiler/pepper-strcast/CMakeLists.txt
+++ b/compiler/pepper-strcast/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(pepper_strcast STATIC ${SOURCES})
-set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(pepper_strcast PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(pepper_strcast PUBLIC include)
  target_link_libraries(pepper_strcast PRIVATE nncc_common)
  target_link_libraries(pepper_strcast PUBLIC nncc_coverage)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt

index 00ffb57dead3c4a9642c92f33ac6b03b2a490bf6..51fd9a391bd45066efba2a729548d361d82ef5ad 100644 (file)
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,7 +1,9 @@
  unset(QUANTIZATION_VALUE_TEST)
  unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
+unset(QUANTIZATION_CONFIG_VALUE_TEST)
+unset(QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM)
  
-nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
    return()
@@ -12,6 +14,11 @@ macro(addTest NAME GRANULARITY DTYPE)
    list(APPEND QUANTIZATION_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
  endmacro(addTest)
  
+macro(addQConfTest NAME GRANULARITY DTYPE)
+  list(APPEND QUANTIZATION_CONFIG_VALUE_TEST ${NAME})
+  list(APPEND QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM ${NAME} ${GRANULARITY} ${DTYPE})
+endmacro(addQConfTest)
+
  # Read "test.lst"
  include("test.lst")
  # Read "test.local.lst" if exists
@@ -20,12 +27,12 @@ include("test.local.lst" OPTIONAL)
  unset(TEST_DEPS)
  
  get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
-get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR)
+get_target_property(SCHEMA_BIN_PATH mio_circle04 BINARY_DIR)
  
  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
                 "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
  
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
  
  ###
  ### Generate test.config
@@ -89,5 +96,22 @@ add_test(
            ${QUANTIZATION_VALUE_TEST_WITH_PARAM}
  )
  
+add_test(
+  NAME pota_fake_wquant_test_with_config
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_fake_wquant_with_config.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
+add_test(
+  NAME pota_quantization_test_with_config
+  COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/test_quantization_with_config.sh"
+          "${TEST_CONFIG}"
+          "${ARTIFACTS_BIN_PATH}"
+          ${QUANTIZATION_CONFIG_VALUE_TEST_WITH_PARAM}
+)
+
  set_tests_properties(pota_record_minmax_test PROPERTIES DEPENDS pota_fake_wquant_test)
  set_tests_properties(pota_quantization_test PROPERTIES DEPENDS pota_record_minmax_test)
+set_tests_properties(pota_quantization_test_with_config PROPERTIES DEPENDS pota_fake_wquant_test_with_config)
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Add_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/AveragePool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Concatenation_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Conv2D_004/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/DepthwiseConv2D_002/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..174d6e9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..733f46e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/FullyConnected_003/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "out",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/InstanceNorm_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/MaxPool2D_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mean_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Mul_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/PRelu_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/ReLU_000/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..630c3e4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/int16/qconf.json
@@ -0,0 +1,14 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm1",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm2",
+            "dtype" : "uint8",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json

new file mode 100644 (file)

index 0000000..cc98d7c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/Split_000/channel/uint8/qconf.json
@@ -0,0 +1,14 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm1",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm2",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json

new file mode 100644 (file)

index 0000000..838b331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/channel/int16/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "uint8",
+            "granularity" : "layer"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json

new file mode 100644 (file)

index 0000000..7cd6ce7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/config_files/TransposeConv_001/layer/uint8/qconf.json
@@ -0,0 +1,9 @@
+{
+    "layers" : [
+        {
+            "name" : "ofm",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..a223fa4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038489170372486115,
+  "zero_point": 129.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..ec6082d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..afa9b1a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0892433300614357,
+  "zero_point": 134.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..a7298cb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014653272228315473,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..ab968c9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..3cb0552
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Add_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00037035736022517085,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..0528cc9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03911808878183365,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..ac5da0b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.027372928336262703,
+  "zero_point": 141.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..353f15a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001523942337371409,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..c4ace78
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/AveragePool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00012122748012188822,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..5228806
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..17ba253
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153
+        ],
+        [
+          68,
+          51
+        ]
+      ],
+      [
+        [
+          34,
+          221
+        ],
+        [
+          0,
+          255
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..5228806
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..71265a2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..53d7cdb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,28 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..71265a2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Concatenation_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..2558bb2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.0039215087890625,
+          2.007843017578125
+        ],
+        [
+          -3.0117650032043457,
+          -4.015686511993408
+        ]
+      ],
+      [
+        [
+          -5.019608497619629,
+          6.023530006408691
+        ],
+        [
+          -7.027451515197754,
+          7.9686279296875
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.01568603515625,
+          -2.007843494415283
+        ],
+        [
+          3.0117645263671875,
+          -1.0039215087890625
+        ]
+      ],
+      [
+        [
+          -7.9686279296875,
+          -6.023530006408691
+        ],
+        [
+          7.027451515197754,
+          5.019608497619629
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json

new file mode 100644 (file)

index 0000000..50d44ec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/bias.json
@@ -0,0 +1,7 @@
+{
+  "weights": [
+    4069,
+    8138
+  ],
+  "scale": 0.0002457468386200985
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..2450886
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003916590008884668,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json

new file mode 100644 (file)

index 0000000..b249a0c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+  "weights": [
+    [
+      [
+        [
+          143,
+          159
+        ],
+        [
+          79,
+          63
+        ]
+      ],
+      [
+        [
+          47,
+          223
+        ],
+        [
+          15,
+          254
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          191,
+          95
+        ],
+        [
+          175,
+          111
+        ]
+      ],
+      [
+        [
+          0,
+          31
+        ],
+        [
+          239,
+          207
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..a2dd668
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.037479765713214874,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..8817cbe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.000030517578125,
+          2.00006103515625
+        ],
+        [
+          -3.000091552734375,
+          -4.0001220703125
+        ]
+      ],
+      [
+        [
+          -4.999908447265625,
+          5.99993896484375
+        ],
+        [
+          -6.999969482421875,
+          8.0
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          4.0001220703125,
+          -2.00006103515625
+        ],
+        [
+          3.000091552734375,
+          -1.000030517578125
+        ]
+      ],
+      [
+        [
+          -8.0,
+          -5.99993896484375
+        ],
+        [
+          6.999969482421875,
+          4.999908447265625
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..b00d8d2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,10 @@
+{
+  "weights": [
+    26925029,
+    53850057
+  ],
+  "scale": [
+    3.714016479907864e-08,
+    3.714016479907864e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..df5d06c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015212147263810039,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..94c794f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,61 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192
+        ],
+        [
+          -12288,
+          -16384
+        ]
+      ],
+      [
+        [
+          -20479,
+          24575
+        ],
+        [
+          -28671,
+          32767
+        ]
+      ]
+    ],
+    [
+      [
+        [
+          16384,
+          -8192
+        ],
+        [
+          12288,
+          -4096
+        ]
+      ],
+      [
+        [
+          -32767,
+          -24575
+        ],
+        [
+          28671,
+          20479
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..e02eeb9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Conv2D_004_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.002048635622486472,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..cd34797
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.9725494384765625,
+          1.945098876953125,
+          3.039216995239258,
+          4.0117645263671875
+        ],
+        [
+          -8.996077537536621,
+          9.9686279296875,
+          -10.94117546081543,
+          12.035295486450195
+        ]
+      ],
+      [
+        [
+          4.98431396484375,
+          5.9568634033203125,
+          7.050981521606445,
+          8.023530960083008
+        ],
+        [
+          13.007843017578125,
+          -13.980391502380371,
+          14.95294189453125,
+          -16.04705810546875
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..e60ff31
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+  "weights": [
+    2156,
+    4312,
+    6468,
+    8624
+  ],
+  "scale": 0.0004638272181067826
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..4ec4ef2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0038153529167175293,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..01835fb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ker.json
@@ -0,0 +1,38 @@
+{
+  "weights": [
+    [
+      [
+        [
+          140,
+          148,
+          157,
+          165
+        ],
+        [
+          58,
+          214,
+          42,
+          231
+        ]
+      ],
+      [
+        [
+          173,
+          181,
+          190,
+          198
+        ],
+        [
+          239,
+          17,
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.12156862765550613,
+  "zero_point": 132.0,
+  "min": -16.04705810546875,
+  "max": 14.952940940856934
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..39c64f3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/channel/int16/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.07362665981054306,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..20c1f67
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,34 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1.00018310546875,
+          2.0,
+          2.99981689453125,
+          4.0001220703125
+        ],
+        [
+          -9.00006103515625,
+          10.0,
+          -10.99993896484375,
+          11.9998779296875
+        ]
+      ],
+      [
+        [
+          5.0001220703125,
+          6.0,
+          6.9998779296875,
+          8.000244140625
+        ],
+        [
+          13.0,
+          -14.0,
+          15.0,
+          -16.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..6323331
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    17503969,
+    32507370,
+    45510319,
+    56887898
+  ],
+  "scale": [
+    5.7129901172951205e-08,
+    6.152450895548591e-08,
+    6.591911673802062e-08,
+    7.031372452055533e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..7105a68
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014399811334442347,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..d465a7c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,53 @@
+{
+  "weights": [
+    [
+      [
+        [
+          2521,
+          4681,
+          6553,
+          8192
+        ],
+        [
+          -22685,
+          23405,
+          -24029,
+          24575
+        ]
+      ],
+      [
+        [
+          12603,
+          14043,
+          15291,
+          16384
+        ],
+        [
+          32767,
+          -32767,
+          32767,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0003967406231879635,
+    0.0004272591326639607,
+    0.0004577776421399579,
+    0.0004882961516159551
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -13.0,
+    -14.0,
+    -15.0,
+    -16.0
+  ],
+  "max": [
+    13.0,
+    14.0,
+    15.0,
+    16.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..2d84cd7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/DepthwiseConv2D_002_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0031168656423687935,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json

new file mode 100644 (file)

index 0000000..e1da53a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ],
+    [
+      1.0039215087890625,
+      2.007843017578125,
+      -3.0117650032043457,
+      -4.015686511993408,
+      -5.019608497619629,
+      6.023530006408691,
+      -7.027451515197754,
+      7.9686279296875,
+      4.01568603515625,
+      -2.007843494415283,
+      3.0117645263671875,
+      -1.0039215087890625,
+      -7.9686279296875,
+      -6.023530006408691,
+      7.027451515197754,
+      5.019608497619629
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json

new file mode 100644 (file)

index 0000000..ecb49bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/bias.json
@@ -0,0 +1,9 @@
+{
+  "weights": [
+    415,
+    -829,
+    -1244,
+    1658
+  ],
+  "scale": 0.00241205753304663
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json

new file mode 100644 (file)

index 0000000..654824b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03844216465950012,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json

new file mode 100644 (file)

index 0000000..3baa421
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.741962730884552,
+  "zero_point": 156.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json

new file mode 100644 (file)

index 0000000..9402240
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/channel/int16/quantization/weight.json
@@ -0,0 +1,80 @@
+{
+  "weights": [
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ],
+    [
+      143,
+      159,
+      79,
+      63,
+      47,
+      223,
+      15,
+      254,
+      191,
+      95,
+      175,
+      111,
+      0,
+      31,
+      239,
+      207
+    ]
+  ],
+  "scale": 0.062745101749897,
+  "zero_point": 127.0,
+  "min": -7.9686279296875,
+  "max": 8.031373023986816
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json

new file mode 100644 (file)

index 0000000..559e537
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/fake_quantization/weight.json
@@ -0,0 +1,76 @@
+{
+  "weights": [
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ],
+    [
+      1.000030517578125,
+      2.00006103515625,
+      -3.000091552734375,
+      -4.0001220703125,
+      -4.999908447265625,
+      5.99993896484375,
+      -6.999969482421875,
+      8.0,
+      4.0001220703125,
+      -2.00006103515625,
+      3.000091552734375,
+      -1.000030517578125,
+      -8.0,
+      -5.99993896484375,
+      6.999969482421875,
+      4.999908447265625
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json

new file mode 100644 (file)

index 0000000..0186c03
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/bias.json
@@ -0,0 +1,14 @@
+{
+  "weights": [
+    27619368,
+    -55238737,
+    -82858105,
+    110477474
+  ],
+  "scale": [
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08,
+    3.620647604581258e-08
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json

new file mode 100644 (file)

index 0000000..1fd68ca
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/in_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014829720021225512,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json

new file mode 100644 (file)

index 0000000..b295021
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/out.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.003870659740641713,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json

new file mode 100644 (file)

index 0000000..69254d1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/FullyConnected_003_config/layer/uint8/quantization/weight.json
@@ -0,0 +1,95 @@
+{
+  "weights": [
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ],
+    [
+      4096,
+      8192,
+      -12288,
+      -16384,
+      -20479,
+      24575,
+      -28671,
+      32767,
+      16384,
+      -8192,
+      12288,
+      -4096,
+      -32767,
+      -24575,
+      28671,
+      20479
+    ]
+  ],
+  "scale": [
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754,
+    0.00024414807580797754
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -8.0,
+    -8.0,
+    -8.0,
+    -8.0
+  ],
+  "max": [
+    8.0,
+    8.0,
+    8.0,
+    8.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..9bf6c9b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03876218944787979,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..87de111
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.029836513102054596,
+  "zero_point": 88.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..5d90528
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015059474390000105,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..25491f0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/MaxPool2D_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014986195310484618,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..ede36c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.039086975157260895,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..bd2fc7f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.028692100197076797,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..18c3b04
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015251495642587543,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..145ee8f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00013844699424225837,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json

new file mode 100644 (file)

index 0000000..394cfb3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mean_000_config/layer/uint8/quantization/reduction_indices.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+    -1
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..bbff895
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03780897706747055,
+  "zero_point": 131.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..ec6082d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          136,
+          153,
+          68
+        ],
+        [
+          51,
+          34,
+          221
+        ]
+      ],
+      [
+        [
+          0,
+          255,
+          187
+        ],
+        [
+          85,
+          170,
+          102
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.05882352963089943,
+  "zero_point": 119.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..cec0bdf
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.232084259390831,
+  "zero_point": 111.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json

new file mode 100644 (file)

index 0000000..f329b43
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm1_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001513722527306527,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json

new file mode 100644 (file)

index 0000000..ab968c9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ifm2.json
@@ -0,0 +1,32 @@
+{
+  "weights": [
+    [
+      [
+        [
+          4096,
+          8192,
+          -12288
+        ],
+        [
+          -16384,
+          -20479,
+          24575
+        ]
+      ],
+      [
+        [
+          -28671,
+          32767,
+          16384
+        ],
+        [
+          -8192,
+          12288,
+          -4096
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.0002441480755805969,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..4b5118c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Mul_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.000991688808426261,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json

new file mode 100644 (file)

index 0000000..7c00160
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/alpha.json
@@ -0,0 +1,13 @@
+{
+  "weights": [
+    [
+      [
+        51,
+        153,
+        255
+      ]
+    ]
+  ],
+  "scale": 0.0019607844296842813,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..05ce9dd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03849203139543533,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..8f88309
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.02848827838897705,
+  "zero_point": 82.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json

new file mode 100644 (file)

index 0000000..6f99899
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/alpha.json
@@ -0,0 +1,21 @@
+{
+  "weights": [
+    [
+      [
+        1,
+        1,
+        1
+      ]
+    ]
+  ],
+  "scale": [
+    0.10000000149011612,
+    0.30000001192092896,
+    0.5
+  ],
+  "zero_point": [
+    0,
+    0,
+    0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..7d1f4c7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015214986342471093,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..533c1e3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/PRelu_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015159364556893706,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..3b97773
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03907399624586105,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..698a8a7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.01955186203122139,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..5a52a1b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001474507007515058,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..ff9e41e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/ReLU_000_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0001422425702912733,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..aaba613
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json

new file mode 100644 (file)

index 0000000..aaba613
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json

new file mode 100644 (file)

index 0000000..aaba613
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.038689617067575455,
+  "zero_point": 128.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json

new file mode 100644 (file)

index 0000000..ac7cde1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/int16/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..2fb0c68
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json

new file mode 100644 (file)

index 0000000..2fb0c68
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm1.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json

new file mode 100644 (file)

index 0000000..2fb0c68
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/ofm2.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00014983004075475037,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json

new file mode 100644 (file)

index 0000000..ac7cde1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/Split_000_config/channel/uint8/quantization/split_dim.json
@@ -0,0 +1,5 @@
+{
+  "weights": [
+     0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..76a0440
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.960784912109375,
+          2.0588245391845703
+        ],
+        [
+          -3.0196075439453125,
+          -3.980391502380371
+        ],
+        [
+          4.9411773681640625,
+          -6.039215087890625
+        ]
+      ],
+      [
+        [
+          7.0,
+          7.960784912109375
+        ],
+        [
+          -9.058823585510254,
+          -10.019607543945312
+        ],
+        [
+          10.980392456054688,
+          -11.941176414489746
+        ]
+      ],
+      [
+        [
+          13.039216995239258,
+          14.000001907348633
+        ],
+        [
+          -14.960784912109375,
+          -16.05882453918457
+        ],
+        [
+          17.019607543945312,
+          -17.980392456054688
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..dc5ca8d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.03869570419192314,
+  "zero_point": 126.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json

new file mode 100644 (file)

index 0000000..bc150bb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ker.json
@@ -0,0 +1,52 @@
+{
+  "weights": [
+    [
+      [
+        [
+          138,
+          146
+        ],
+        [
+          109,
+          102
+        ],
+        [
+          167,
+          87
+        ]
+      ],
+      [
+        [
+          182,
+          189
+        ],
+        [
+          65,
+          58
+        ],
+        [
+          211,
+          44
+        ]
+      ],
+      [
+        [
+          226,
+          233
+        ],
+        [
+          22,
+          14
+        ],
+        [
+          255,
+          0
+        ]
+      ]
+    ]
+  ],
+  "scale": 0.13725490868091583,
+  "zero_point": 131.0,
+  "min": -17.980392456054688,
+  "max": 17.019609451293945
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json

new file mode 100644 (file)

index 0000000..bfd8621
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/channel/int16/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 1.6333034038543701,
+  "zero_point": 127.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json

new file mode 100644 (file)

index 0000000..6df24eb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/fake_quantization/ker.json
@@ -0,0 +1,48 @@
+{
+  "weights": [
+    [
+      [
+        [
+          0.999786376953125,
+          2.0001220703125
+        ],
+        [
+          -2.999908447265625,
+          -4.000244140625
+        ],
+        [
+          5.000030517578125,
+          -5.99981689453125
+        ]
+      ],
+      [
+        [
+          7.000152587890625,
+          7.99993896484375
+        ],
+        [
+          -9.000274658203125,
+          -10.00006103515625
+        ],
+        [
+          10.999847412109375,
+          -12.00018310546875
+        ]
+      ],
+      [
+        [
+          12.999969482421875,
+          13.999755859375
+        ],
+        [
+          -15.000091552734375,
+          -15.9998779296875
+        ],
+        [
+          17.000213623046875,
+          -18.0
+        ]
+      ]
+    ]
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json

new file mode 100644 (file)

index 0000000..82f7fa2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ifm_Quantize.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.00015178922330960631,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json

new file mode 100644 (file)

index 0000000..8d0ceb1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ker.json
@@ -0,0 +1,58 @@
+{
+  "weights": [
+    [
+      [
+        [
+          1820,
+          3641
+        ],
+        [
+          -5461,
+          -7282
+        ],
+        [
+          9102,
+          -10922
+        ]
+      ],
+      [
+        [
+          12743,
+          14563
+        ],
+        [
+          -16384,
+          -18204
+        ],
+        [
+          20024,
+          -21845
+        ]
+      ],
+      [
+        [
+          23665,
+          25485
+        ],
+        [
+          -27306,
+          -29126
+        ],
+        [
+          30947,
+          -32767
+        ]
+      ]
+    ]
+  ],
+  "scale": [
+    0.0005493331705679495
+  ],
+  "zero_point": 0.0,
+  "min": [
+    -18.0
+  ],
+  "max": [
+    18.0
+  ]
+}
diff --git a/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json

new file mode 100644 (file)

index 0000000..f370bf4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/expected_outputs/TransposeConv_001_config/layer/uint8/quantization/ofm.json
@@ -0,0 +1,4 @@
+{
+  "scale": 0.0122029148042202,
+  "zero_point": 0.0
+}
diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake

index 4eb7204e1c60e7152caa5a45badab60fe87296e5..5ce8dfb5d691e568c84f5006c941d4f07a45ed38 100644 (file)
--- a/compiler/pota-quantization-value-test/requires.cmake
+++ b/compiler/pota-quantization-value-test/requires.cmake
@@ -2,4 +2,4 @@ require("record-minmax")
  require("circle-quantizer")
  require("circle-tensordump")
  require("common-artifacts")
-require("mio-circle")
+require("mio-circle04")
diff --git a/compiler/pota-quantization-value-test/test.lst b/compiler/pota-quantization-value-test/test.lst

index 4beec8c0e57eeb2a807073b8fa6b7d3e71093253..e169de57c8fe59b8b633db464488e040be560703 100644 (file)
--- a/compiler/pota-quantization-value-test/test.lst
+++ b/compiler/pota-quantization-value-test/test.lst
@@ -31,3 +31,32 @@ addTest(Split_000 channel int16)
  addTest(TransposeConv_001 channel uint8)
  addTest(TransposeConv_001 channel int16)
  addTest(TransposeConv_001 layer uint8)
+
+addQConfTest(Add_002 layer uint8)
+addQConfTest(Add_002 channel int16)
+addQConfTest(AveragePool2D_000 layer uint8)
+addQConfTest(AveragePool2D_000 channel int16)
+addQConfTest(Concatenation_001 layer uint8)
+addQConfTest(Concatenation_001 channel int16)
+addQConfTest(Conv2D_004 channel int16)
+addQConfTest(Conv2D_004 layer uint8)
+addQConfTest(DepthwiseConv2D_002 channel int16)
+addQConfTest(DepthwiseConv2D_002 layer uint8)
+addQConfTest(FullyConnected_003 channel int16)
+addQConfTest(FullyConnected_003 layer uint8)
+#addQConfTest(InstanceNorm_001 layer uint8)    Enable this when int16 CWQ data is ready.
+#addQConfTest(InstanceNorm_001 channel int16)  Enable this when int16 CWQ data is ready.
+addQConfTest(Mean_000 layer uint8)
+addQConfTest(Mean_000 channel int16)
+addQConfTest(MaxPool2D_000 layer uint8)
+addQConfTest(MaxPool2D_000 channel int16)
+addQConfTest(Mul_001 layer uint8)
+addQConfTest(Mul_001 channel int16)
+addQConfTest(PRelu_001 layer uint8)
+addQConfTest(PRelu_001 channel int16)
+addQConfTest(ReLU_000 layer uint8)
+addQConfTest(ReLU_000 channel int16)
+addQConfTest(Split_000 channel uint8)
+addQConfTest(Split_000 channel int16)
+addQConfTest(TransposeConv_001 channel int16)
+addQConfTest(TransposeConv_001 layer uint8)
diff --git a/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh

new file mode 100755 (executable)

index 0000000..070b273
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_fake_wquant_with_config.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+# This script tests fake quantization with config file
+#
+# HOW TO USE
+#
+# ./test_fake_wquant_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do  
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.fake_quantized.mixed.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_fake_quantization_with_config.log" <(
+    exec 2>&1
+    set -ex
+
+    # Run circle-quantizer with --quantize_dequantize_weights
+    "${CIRCLE_QUANTIZER_PATH}" \
+      --quantize_dequantize_weights float32 "${DTYPE}" "${GRANULARITY}" \
+      --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+      "${WORKDIR}/${MODELNAME}.circle" \
+      "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" 
+
+    # Dump weights values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5"
+
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.fake_quantized.mixed.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/fake_quantization" \
+      --mode fake_quantization
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..b6e2efa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.8596993, 4.8127713,-3.4127183, 4.2323627,-2.2201376,-1.5362649,-4.9921966, 0.9565166, 3.2879171,-1.3590081,-3.771852 ,-4.1042285
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..bcf2807
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.14624089, 4.7304125 , 4.833998  , 4.2321773 ,-2.0582533 ,-2.3694758 , 1.4213978 , 2.2444596 , 3.3630798 ,-0.70257574, 3.586656  ,-2.513805  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..c3e32d2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 2.175218  , 0.02776978,-2.6291077 , 3.5350094 ,-1.2364857 ,-3.3151364 ,-0.92507887, 2.8038094 ,-1.8781518 , 3.6221995 , 2.4015775 ,-2.9217577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..a92abd4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.0345451,-1.5055941,-4.144375 ,-4.727011 , 1.5841546, 4.5780725,-4.24402  ,-2.3966947,-3.0370803,-1.0234503,-0.2750057, 3.2965126
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..2f2937f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.4460397 , 2.6090143 , 4.1773095 , 0.11204174,-3.3053472 , 2.5160108 ,-3.0612547 , 1.0667087 , 2.8952355 , 3.842513  , 0.6790793 ,-0.33375   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..a219546
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-0.48516417,-4.5555663 ,-2.9907737 , 2.422857  , 1.010034  , 3.6436582 , 0.29334423,-4.0628953 , 1.0116768 , 3.0871766 , 3.3341465 , 4.3921704 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..70d3139
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-0.7787985 , 4.101575  ,-0.4839729 , 0.35971674,-4.3452406 ,-4.811665  ,-3.8693128 , 4.239326  , 0.44103175, 3.5549765 , 2.5334291 , 1.4546562 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..3c38f8d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.5943313,-1.4843192, 1.956341 ,-1.3242344, 1.5901331,-3.641623 , 4.6022506,-0.307265 ,-0.6359913,-4.0109854,-1.2064985, 1.1137954
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..e89a022
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.1036437 ,-0.39538398,-0.07278133, 4.547673  , 3.9132211 , 2.6468625 ,-4.2830634 ,-2.0573084 , 2.1074655 ,-4.0634165 ,-4.55598   ,-0.7942089 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..2b00832
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Add_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.7745228, 1.4813256, 4.4699864, 3.7466738,-2.9847758,-4.453416 , 3.2515864,-1.2459193,-4.44965  ,-1.8452735, 4.423347 , 4.2998137
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..e42cbf8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1358833e+00, 1.7854472e+00, 4.1751757e+00, 5.5915713e-01,-2.6459083e-01,-1.7176826e+00,-1.8155930e+00, 2.8710868e+00,-2.7043006e+00, 1.0959731e+00,-2.0176995e+00,-6.5950048e-01,-3.6413522e+00,-4.1966043e+00,-2.6820884e+00,-3.6055098e+00, 3.6852844e+00, 8.9128174e-02, 1.3107824e+00,-3.6425626e+00,-3.2318896e-01, 3.6238370e+00,-4.9837337e+00,-4.0550299e+00,-1.4882606e+00, 1.5547658e+00,-1.1696080e+00, 2.1651111e+00, 4.9318314e+00,-3.5928023e+00,-1.2348548e+00,-1.7002642e+00, 1.7365140e+00,-8.8151926e-01,-4.1655774e+00,-1.0166957e+00,-3.7440193e+00, 2.8588972e+00, 4.1286149e+00,-4.9504828e+00, 4.8477168e+00,-2.2587967e+00, 2.8542519e+00,-7.9565448e-01, 6.8252671e-01, 2.5875571e-01,-6.3935977e-01,-4.8547015e+00, 4.1373856e-03,-1.3893708e+00, 8.8775367e-01, 2.1222150e-01, 3.1871333e+00, 1.3869151e+00,-3.8274391e+00, 3.2623324e+00, 7.2669631e-01, 1.0303619e+00, 8.1438148e-01, 8.1272924e-01,-2.7527118e+00, 1.8215455e+00,-1.6416427e-01, 4.9103169e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..7caf8ce
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.250757  , 1.4186406 , 0.63726735,-0.35924944, 1.9436699 , 3.2695885 , 3.6638293 , 4.5166173 , 1.3807241 ,-1.9112543 ,-1.9026492 ,-0.4800549 , 2.818216  ,-4.6390033 ,-3.8570547 , 3.6634028 ,-1.2112037 ,-1.3335027 , 1.3524677 , 2.7240725 ,-3.8335826 , 1.1397903 ,-3.1570992 ,-4.802078  , 3.8334577 , 0.23457901, 0.7132307 , 2.9887354 , 2.9702394 ,-1.4113717 ,-0.66712093, 0.77366674, 1.9308351 ,-0.45465755, 4.925366  , 2.4214447 , 2.8401468 , 0.49789894, 0.53141665,-2.7466767 , 0.2059374 ,-4.9661317 ,-4.1334467 , 1.6928389 ,-0.42529574, 1.1033608 , 4.275776  , 1.5063075 , 2.3528252 , 0.79505247, 3.9829993 ,-4.8472476 ,-1.2752185 , 3.7365675 , 1.976164  ,-4.742636  ,-2.7199092 ,-2.9191706 ,-3.181069  ,-4.489485  , 4.0847454 , 2.2164    , 0.9725334 ,-0.72566307
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..7facffa
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-3.8293874 ,-0.13678598,-2.5444264 , 1.654611  ,-4.3037786 ,-3.4240584 ,-4.5642533 , 4.1250315 , 1.0469195 , 4.2802887 , 3.1617825 ,-3.1706758 ,-0.99622065, 2.7707603 , 3.7494645 ,-1.4548893 , 2.328633  , 1.7976477 ,-1.2107176 ,-2.0178459 ,-0.6488357 ,-2.9393644 , 2.8918762 , 3.6192262 ,-4.1777225 , 1.3264071 , 0.32620123, 0.7890992 ,-3.304334  , 3.4893208 , 2.5354576 ,-4.7718143 , 3.8602633 , 0.4927564 , 2.2971296 ,-0.3296792 , 2.8115997 ,-0.75152504, 0.558675  ,-2.343631  , 4.650826  ,-3.0893488 , 0.8726873 , 0.24922371, 2.7634025 , 1.0358421 ,-3.862506  ,-3.169402  ,-2.5373347 , 0.9484093 , 4.1409917 ,-4.0408096 ,-2.7231216 ,-2.548547  ,-2.6315095 , 0.8164778 ,-3.017436  , 1.1860138 ,-1.8634807 , 1.8684052 , 1.8657844 , 1.7747321 ,-3.1472425 ,-1.3989028 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..0be8fdd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.0492268 ,-2.2555764 ,-1.3543441 ,-3.7278662 ,-4.8601675 , 3.1095552 , 4.6319957 , 3.0211062 , 1.7870535 , 4.8839574 ,-1.3494394 , 2.635408  ,-0.24201432, 1.312397  , 0.16790341, 2.42507   ,-3.101355  , 3.1760497 ,-4.500736  ,-2.53691   , 1.064206  , 0.62096214, 2.803344  ,-4.6166744 ,-4.624786  , 3.667064  ,-1.484021  , 4.9401817 ,-3.763283  , 3.4351027 ,-2.906393  , 4.9945946 ,-3.2997096 , 3.6325612 ,-0.47211674, 0.28783202, 1.8703817 ,-4.042374  ,-3.3353784 , 4.9085765 ,-1.6753131 ,-3.4926984 ,-4.8663344 ,-4.495712  , 2.3402312 ,-1.0722051 , 0.28559962, 2.1208072 , 1.3024254 , 3.4810693 , 0.09860361, 1.695624  , 1.3901931 , 1.6858819 , 3.8231227 , 4.5972557 ,-4.6835494 , 0.5753765 ,-2.2377403 , 0.13013013,-2.1165738 ,-0.26044115,-0.653468  , 1.1010929 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..7e2d618
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 4.397323  ,-0.51448834, 2.5729322 ,-4.3229046 , 1.149113  ,-3.8652143 ,-1.7352968 ,-0.7575065 ,-0.41720778, 4.327346  ,-4.2363043 , 0.8653738 ,-1.7511971 ,-0.7874244 ,-4.0734816 , 2.5622475 ,-3.1229742 ,-1.1783633 , 0.4017013 ,-0.76175183,-1.058416  , 1.128772  ,-3.0143378 ,-2.6688366 ,-2.575279  ,-4.326955  , 4.175434  , 4.791393  ,-1.10654   ,-4.4417224 , 3.5057635 , 1.5339037 ,-4.0297494 ,-3.7187057 ,-0.6645762 , 4.215642  , 1.6742749 , 2.5468905 , 1.73195   ,-3.3100636 ,-4.4818826 ,-2.5627983 ,-1.4624406 , 1.2433167 ,-4.005364  ,-4.3450556 ,-1.0652863 ,-1.0240986 , 3.989825  ,-4.1690702 ,-4.595108  ,-1.1154945 , 0.65749156, 2.5127344 , 2.509761  ,-4.3936505 , 3.6513395 ,-2.3340352 ,-4.3615093 , 3.5973237 , 0.9316653 , 1.9391845 , 3.6356397 , 0.8133118 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..2a6b09b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.629505  , 1.0121975 ,-0.13417433,-2.329806  ,-3.4927373 ,-0.7574039 ,-2.2674313 , 3.1983519 , 2.4298382 ,-0.23268977, 2.0218065 ,-1.5087285 ,-1.3953347 ,-3.8100643 ,-1.7438283 , 3.9852605 , 2.9817178 ,-4.0460877 , 0.09402129, 4.3802586 ,-1.0991771 , 0.4134776 , 2.8136911 ,-3.6254618 ,-3.925183  , 4.691824  , 4.381538  ,-3.235543  ,-2.6764185 , 2.659456  ,-3.2127233 , 0.0206281 , 3.4056723 ,-1.693684  , 1.1005328 ,-3.1486542 , 0.77198106, 1.4526777 ,-2.3614178 , 4.8214664 ,-3.1486242 , 0.58941853,-4.1100698 , 4.1982718 , 1.7219902 ,-2.4375956 ,-1.7505955 , 1.7465224 ,-2.7494361 , 4.0679016 , 1.8936038 ,-4.523818  ,-3.4124248 ,-4.809946  ,-1.939553  , 4.9411273 , 1.6261404 ,-2.6846552 , 2.1339247 , 0.61396503,-1.6662381 , 2.4282491 , 2.662007  ,-0.40868336
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..470da6c
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.70593804, 3.253847  , 1.1094694 , 0.5295975 , 0.5944647 ,-2.4391694 , 4.7912955 , 4.4374456 ,-2.942428  ,-3.5038033 ,-3.180417  , 2.1914082 ,-4.5295396 ,-3.0037553 ,-2.265191  , 0.20113531, 2.3805366 ,-0.9111223 ,-4.3170924 , 4.08436   , 1.1006241 ,-1.286977  , 4.811279  , 0.9131829 , 3.2051497 ,-2.8660698 ,-3.188871  , 1.4163305 , 4.061829  , 2.7783196 ,-3.4975152 , 3.4888391 , 2.5789826 ,-1.5264264 ,-0.13952135,-1.280177  , 2.4716458 , 2.6200528 ,-2.515086  , 3.441416  , 2.4515297 ,-0.9845471 , 0.9481396 , 1.1518412 , 1.6088997 , 1.445077  , 2.2620194 ,-2.0843177 ,-0.7263964 , 1.8159748 ,-3.3673623 , 0.2554476 ,-4.3550563 ,-1.4280493 ,-2.2702312 ,-4.7424164 ,-0.57241255,-2.813357  , 2.9161859 ,-0.9036504 , 0.00511268, 0.60724795, 4.8010454 , 1.6000834 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..d9e048b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 7.07888961e-01, 4.75798702e+00,-1.47843570e-01,-1.95845592e+00, 4.26537895e+00,-3.03711486e+00,-1.35137546e+00,-1.10638596e-01,-1.02415502e+00,-2.65345359e+00, 5.48920631e-01,-4.38003826e+00, 3.61377740e+00,-2.91408587e+00,-3.22874010e-01,-4.74363208e-01, 3.45294738e+00, 1.02204478e+00,-1.44102740e+00, 6.80687547e-01,-2.44050741e+00, 3.71395111e+00,-2.14443612e+00, 3.70928717e+00, 1.35871637e+00, 9.73374963e-01, 1.57826161e+00,-2.91381836e-01, 1.46376801e+00, 2.96391749e+00, 1.08418810e+00,-3.50718546e+00, 4.68637037e+00, 1.04839933e+00, 2.24482760e-01, 2.38816309e+00, 3.18772525e-01,-3.90284014e+00,-3.32757282e+00,-1.61143410e+00,-1.26013708e+00, 2.24948835e+00, 7.63151050e-01, 4.18296242e+00,-8.69123042e-01, 3.19850564e-01, 3.52391124e-01, 3.30018830e+00,-4.64861393e+00,-4.64479780e+00,-2.68103647e+00,-1.13277221e+00, 2.02201343e+00,-4.05572534e-01, 3.06759548e+00,-3.55881310e+00,-1.14900565e+00,-3.00835490e+00, 1.31509733e+00, 2.50206441e-01, 2.47731134e-01, 4.98673916e+00,-1.74064383e-01,-4.43180744e-03
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..cdbf98e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5591762 , 4.8821726 , 0.44271094, 4.786732  ,-2.4497197 , 2.4973536 , 2.034311  , 4.8329844 ,-3.9451184 , 4.9937835 , 2.0246332 ,-2.8319602 , 3.9617133 , 4.10946   ,-4.3191586 ,-2.8492777 ,-2.648121  ,-4.199404  ,-0.05163948,-4.7944984 , 2.8989205 , 1.4747709 ,-3.1194637 ,-2.877846  ,-0.39301065, 2.616311  , 2.6305614 , 1.7303206 , 3.6059175 ,-2.745988  , 2.5924454 , 3.0149276 , 4.0359216 ,-0.6135884 ,-2.5023808 ,-2.3395267 ,-3.0633461 ,-2.3836162 ,-4.4779797 ,-1.30866   , 1.9110863 , 0.654628  ,-4.559368  , 0.34231895,-0.8196542 , 4.7275734 , 3.2823656 ,-4.9644713 , 2.9191613 ,-3.4621727 ,-4.276584  ,-1.7153062 , 1.8820064 , 1.2659297 , 3.4141889 ,-4.905296  , 4.619848  ,-3.9501083 ,-1.5550466 , 3.6841137 , 1.7121594 , 1.9466268 , 1.5684807 , 4.5554323 
diff --git a/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..065d77d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/AveragePool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.2269225 ,-1.2782103 ,-3.381931  ,-1.5229299 , 2.0681949 , 1.7630705 ,-0.81455594,-2.6558595 ,-3.4870632 ,-4.647749  , 2.4453654 ,-2.242679  ,-1.0272806 , 0.5656208 , 0.69442594,-4.4343104 ,-3.9649677 ,-3.8908577 ,-1.642287  , 3.0714357 , 1.0880747 ,-2.1665683 ,-4.0994506 , 2.004911  , 3.5922902 , 3.775     , 1.1580672 ,-1.4154137 ,-4.4964633 ,-1.696588  , 4.0220857 ,-1.2785947 ,-4.2075186 ,-4.515838  , 0.99715126, 3.0928102 ,-2.295537  ,-4.772882  ,-1.2936146 ,-2.6903791 , 0.10453273,-1.8041211 , 3.787591  , 0.9493053 ,-4.41586   , 3.4252715 ,-0.25001565, 4.655357  ,-1.8767506 , 0.00600041, 4.660605  , 2.550518  ,-3.830558  , 1.7777463 ,-0.7170577 ,-0.26554853,-3.5770113 ,-1.1354474 , 4.663121  , 3.100427  , 0.03313563,-1.7419808 ,-1.4426676 ,-3.912533  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..9def1c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.24671102,3.271825  ,3.979895  ,1.3334678 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..eaec240
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 1.9181111, 2.2396102,-2.8641696,-1.9045062
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..3e05181
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+4.751434  ,2.8798263 ,0.15149078,2.9485583 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..19d95b2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-1.5743442 , 0.6716824 , 0.75737774,-0.27396253
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..d302e07
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.0539489 , 1.9595883 , 0.19975437, 2.526178  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..af1c2df
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0575085 , 2.5941508 ,-2.550309  ,-0.03760919
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..0ede613
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.4857123,-4.032874 ,-3.687589 ,-1.235227 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..b0b0392
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.21878362, 3.9175916 ,-4.6141233 , 3.709655  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..d8a8cad
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-1.9645791,-1.4466153, 1.2543651,-1.0288917
diff --git a/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..ca2a1c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Concatenation_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.1611342, 2.4875243, 3.096089 ,-1.1327268
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..0614b5e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.01090685,0.0581577 ,0.637094  ,0.64067715,0.26264507,0.13692169,0.9649414 ,0.5117181 ,0.18012471,0.07855253,0.6358017 ,0.62257963,0.41469443,0.93169045,0.20763828,0.7634293 ,0.75929826,0.72708374,0.23463063,0.58222896,0.6351517 ,0.68781173,0.5558012 ,0.7652179 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..b1c3938
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.57017624,0.08235867,0.03672464,0.40372616,0.7353964 ,0.59611887,0.7675548 ,0.21004233,0.09803218,0.20009473,0.8821493 ,0.17015271,0.14840214,0.99910176,0.37003204,0.22893582,0.43173164,0.3105084 ,0.41997132,0.43714985,0.08115962,0.71896386,0.7810953 ,0.00524598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..7e562de
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.65292275,0.79842275,0.97853714,0.6711518 ,0.607567  ,0.40971732,0.74838483,0.95853555,0.32158023,0.911524  ,0.66938365,0.8573132 ,0.3047727 ,0.5561248 ,0.914098  ,0.07650814,0.37868017,0.29269257,0.19652605,0.63025194,0.61496884,0.32011527,0.8204132 ,0.21866946
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..2958a7f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4548901 ,0.56957537,0.0252368 ,0.4884317 ,0.7516498 ,0.02631272,0.22107519,0.95249426,0.34902394,0.11520014,0.808911  ,0.4148615 ,0.63615656,0.84020686,0.3633697 ,0.23993976,0.54176176,0.86938345,0.81628686,0.6380988 ,0.91891205,0.0406627 ,0.90289026,0.9429013 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..fc96930
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.9309136 ,0.02123719,0.64467335,0.6910113 ,0.47402772,0.54622203,0.31527275,0.81530565,0.98981965,0.36102158,0.03114039,0.1902339 ,0.45183742,0.60178596,0.4683102 ,0.59810966,0.40558222,0.5420302 ,0.72699505,0.9575108 ,0.46746576,0.08518691,0.40302262,0.69213694
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..f82ad67
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.4040831 , 4.8621206 , 0.22880335,-0.3116556 , 0.260938  ,-0.61554366, 3.779648  ,-4.650609  , 3.886638  ,-0.25574106,-0.45002133, 4.9870906 ,-2.3277295 ,-4.9648423 ,-3.7695415 , 3.2857463 ,-4.5514555 ,-3.7705963 , 3.8458307 ,-4.797776  ,-3.4295716 ,-4.6026535 ,-1.4011091 , 2.8851774 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..7223372
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.171929  ,-2.2911541 , 2.8965824 , 0.27504483,-1.6088463 ,-0.6509234 ,-3.262618  , 0.9633116 , 2.4504175 , 0.97706884, 0.4212074 , 1.4083375 ,-2.9757218 ,-3.1010823 ,-1.7146534 , 4.105306  , 0.07195274, 3.0232217 ,-2.7568955 ,-4.8887763 ,-3.4171093 ,-0.91494775, 2.5260248 , 4.74184   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..1283a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 0.14139967, 1.9541235 ,-4.945228  ,-0.48999134, 3.7479703 , 0.29318067, 0.21036309, 4.357736  ,-4.3354783 ,-1.9236348 , 0.49615476,-1.8418436 ,-2.425741  , 4.817022  , 1.5093465 , 2.417444  ,-4.69463   , 0.3433745 ,-4.5979595 ,-3.9027495 ,-0.29977685, 4.9239326 ,-0.39175773, 1.277211  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..c931e17
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-3.692852  ,-1.0075341 ,-2.4409268 , 0.92995465,-3.1325107 , 4.028981  , 0.8446181 ,-2.2990613 , 4.0820794 , 3.1633005 , 4.1527267 ,-3.9514909 , 2.6104712 , 4.660645  ,-1.7398617 , 0.15663597,-3.6861904 ,-2.9019265 , 3.8828175 ,-2.712909  , 4.3699546 ,-3.5953352 ,-3.0655813 , 0.59767616
diff --git a/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..d33c2db
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Conv2D_004_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-2.8695228 , 2.865197  , 0.6635586 , 0.22709726, 2.85572   ,-4.2051144 , 1.5833759 ,-4.4277377 , 4.0004573 , 2.4766827 , 3.0412688 ,-4.8891425 ,-4.489896  , 3.0812325 , 2.1947708 , 1.6387184 , 0.31932488,-0.41092923,-0.0730476 , 0.7265327 , 4.1333    , 3.157228  , 4.7395325 , 3.4576747 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..f4fb503
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/0.txt
@@ -0,0 +1 @@
+0.4383064 ,0.8700848 ,0.86010957,0.08396256,0.7963264 ,0.4156023 ,0.28146362,0.82196397,0.9921972 ,0.09969576,0.23987265,0.6734369 ,0.5469574 ,0.20805728,0.32639247,0.76773816
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..af4b015
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/1.txt
@@ -0,0 +1 @@
+0.4565062 ,0.92036587,0.47286046,0.18118097,0.5347498 ,0.91550153,0.300375  ,0.00581101,0.38686675,0.91085213,0.07278002,0.35556316,0.13014294,0.7274307 ,0.13867259,0.27517235
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..5771603
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/2.txt
@@ -0,0 +1 @@
+0.6900174 ,0.28745306,0.30255774,0.5095008 ,0.6689176 ,0.4914624 ,0.92629427,0.504829  ,0.33514255,0.49005315,0.08569656,0.60965323,0.82193315,0.12380831,0.06971261,0.8822662 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..1e03d83
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/3.txt
@@ -0,0 +1 @@
+0.4240734 ,0.5430392 ,0.7536325 ,0.46065134,0.00315792,0.02719985,0.7080977 ,0.24389206,0.8114604 ,0.13292362,0.346597  ,0.70247084,0.55753845,0.01969242,0.82950485,0.66249627
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..89ee30a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/channel/int16/4.txt
@@ -0,0 +1 @@
+0.31586212,0.19079527,0.9161567 ,0.8614566 ,0.9018915 ,0.34651542,0.62554437,0.05542602,0.8268219 ,0.38112178,0.9396123 ,0.49426383,0.8034765 ,0.72456217,0.5404088 ,0.8512237 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..cc434b0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-4.0618963 ,-0.56899416,-2.6450877 , 2.4534085 , 1.98115   , 1.906561  ,-3.9617727 ,-0.6071247 , 3.1096997 , 4.4270124 ,-2.8755112 ,-1.8822336 ,-2.3567479 , 1.9797888 ,-3.5018713 , 3.429169  
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..2c637a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.6089132 , 1.4328785 ,-3.2579598 ,-2.1328773 ,-2.6566415 , 2.541386  ,-4.3314023 , 0.48684084, 3.3134763 ,-2.69083   ,-0.45710313,-3.6763198 , 0.22075526,-3.159208  ,-2.1573126 , 4.1621423 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..4b57fe8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.061572  , 3.0518744 , 2.694435  ,-4.720131  , 1.3782452 , 4.083631  , 4.1221976 ,-1.2299284 , 3.096133  , 3.8382158 ,-1.9518853 , 4.350529  , 0.09219506, 2.6483617 , 0.74373996, 2.7447948 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..49c3022
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.68769   ,-3.2768764 , 3.1849844 , 4.497627  ,-1.2611016 ,-3.1152303 ,-0.8408633 , 0.4938034 , 4.0921655 ,-2.3150117 , 0.10100875,-3.8374226 , 4.08059   ,-0.74594986,-3.1000822 , 4.3654246 
diff --git a/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..e02c8ca
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/DepthwiseConv2D_002_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-3.6168842 , 4.1935644 , 0.73750836, 4.6044145 , 2.8967912 ,-1.8085694 , 4.539956  ,-0.37032878, 1.9738418 , 1.5388782 ,-2.945171  ,-3.3875864 ,-4.516983  ,-3.4998245 ,-4.676514  ,-2.2738194 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..233e5ea
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 2.7731526 , 2.451602  , 3.7535272 ,-1.2774152 , 1.5482912 , 1.3402948 , 4.4792123 ,-4.4954367 , 3.354679  ,-3.3615496 ,-4.619757  ,-3.3659618 , 4.7626247 ,-1.3596478 ,-4.835548  , 0.78964525
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..6a12608
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.5400839 ,-3.2621996 ,-3.4817135 , 3.8183312 , 0.48498327, 2.9812584 , 4.111276  , 0.11223658, 4.7201405 , 2.4256718 , 1.4895477 , 4.7596602 ,-0.32709372, 1.3507305 ,-0.30043927,-1.8077502 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..eccd2c6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 3.8758078 , 4.978636  ,-0.22925885,-2.6760504 ,-1.9160627 ,-4.609644  ,-0.9515802 , 3.558274  , 2.9096057 , 0.3340422 , 0.38608226,-0.32168412, 4.688853  ,-4.583811  ,-2.5113506 ,-4.6688786 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..0da0527
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.9868221 , 2.4237797 , 1.0833962 ,-0.9231426 ,-2.1091506 ,-2.6163697 ,-0.23101932,-1.9252896 , 4.7034135 , 3.1088963 ,-2.345823  ,-2.7866168 ,-3.186763  ,-4.431844  , 3.3113294 , 0.9501982 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..ace24f7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 3.9716747 ,-2.254871  , 1.1943274 ,-2.212602  , 3.4311683 , 1.114989  , 4.0739036 , 0.47244295,-3.5793104 ,-3.359908  ,-4.7657595 , 2.0369127 ,-2.5619278 ,-3.4452975 ,-4.5852203 ,-1.137643  
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..18b34c8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.5887886e+00,-4.7446389e+00,-8.6568648e-01,-2.9789083e+00, 4.4470620e+00,-4.6563668e+00,-3.8466794e+00, 1.8815753e-03,-2.7699089e+00, 5.2776605e-01, 3.6518128e+00,-3.0939088e+00,-3.6008542e+00, 7.2454107e-01, 2.2568390e+00,-4.4835806e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..d652da6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.770412  ,-1.7520845 , 2.4057522 ,-0.74166125,-0.10780027, 4.5796657 ,-3.513094  ,-3.0285823 , 1.2001143 , 2.806742  ,-2.0503895 , 2.8160958 ,-1.5392824 ,-3.7772799 , 2.9158401 ,-1.0586692 
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..e6d6e00
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 3.937408  ,-0.11191579, 2.2054992 , 2.847275  , 3.4895647 , 4.2361116 ,-3.2401278 ,-1.5813186 ,-4.558396  ,-0.89455926, 4.204445  , 3.5968838 , 2.773891  ,-2.9562843 ,-0.62606305,-0.03814701
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..8b47205
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 3.5032003 , 4.6036057 , 0.28915945, 4.671659  ,-1.978598  , 2.1773603 ,-0.54175234,-3.0131943 ,-2.7422159 ,-3.4361897 , 0.2850049 , 4.1412387 ,-4.86403   ,-0.67577606,-1.4206086 ,-2.357092  
diff --git a/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..bba80be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/FullyConnected_003_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.5063417 , 0.22874236, 2.2677753 ,-4.4159026 , 1.7464    , 4.6051064 ,-4.2867146 , 2.730521  , 1.6372519 , 0.70292765, 3.459053  ,-4.162376  , 0.36788836, 2.213299  , 4.110952  , 1.6797827 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..31a2db0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.1984134 , 3.7565446 , 1.3521377 ,-4.0263743 ,-1.929471  ,-3.7523155 , 1.3858393 , 4.1565247 ,-2.4681342 , 0.3598748 ,-2.0044599 , 3.7168603 , 3.6330557 , 3.0176272 ,-4.4643235 ,-0.1893698 , 3.8839848 ,-4.5703125 , 3.365731  , 4.5556674 , 4.954971  , 1.7591819 ,-0.9497736 ,-0.8527185 ,-1.1863561 ,-4.522639  ,-4.3187394 ,-3.702939  , 0.15341021, 0.8564923 , 1.9076811 , 4.2765    ,-3.7695112 ,-1.6033245 , 2.3159432 ,-1.6656336 , 1.4186145 , 4.334284  , 4.0654674 ,-4.518256  , 0.72815216, 2.5133176 ,-4.238172  , 1.0198449 ,-0.9638457 , 2.5847483 , 4.0381308 , 4.472872  , 0.11794223, 1.3358012 , 1.7975981 , 2.168553  ,-3.5131238 , 3.8412008 , 3.851232  ,-2.130775  , 3.556102  , 0.69062364,-4.668594  ,-4.619906  ,-2.87768   ,-1.0679495 ,-4.523185  , 4.184176  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..2bdd62b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 2.9193265 , 4.315574  ,-3.7834768 , 3.4352486 , 4.1452866 ,-4.0322523 , 1.8039155 ,-4.080042  ,-1.1999705 , 4.9018297 ,-0.27180746, 1.709373  , 4.3322196 , 4.9179945 ,-3.977508  , 2.3486571 ,-0.11026379,-0.24730131, 2.3269305 , 2.1862001 , 0.92486495, 3.5822759 , 2.8370361 , 3.915398  ,-0.6385275 ,-0.02720119,-1.408676  ,-4.4472733 , 1.2901759 ,-4.60209   ,-2.9502335 ,-2.650517  ,-1.4038593 ,-2.967456  ,-2.0060933 ,-1.9603083 ,-0.4727794 ,-1.7877682 ,-3.9565926 , 1.4452418 , 2.5925353 ,-4.5134907 ,-4.195412  , 2.4681656 , 0.7140492 , 3.0753498 , 0.269442  ,-4.768041  ,-3.5370746 , 1.0272335 ,-0.7654047 ,-1.977087  , 3.1920779 , 0.37378865, 4.016262  ,-3.3201067 ,-4.7767315 ,-3.5074112 ,-4.094166  , 1.6035818 , 1.6506963 ,-3.2142932 , 4.7714067 ,-1.7164946 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..8c770f6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-1.8028042 , 1.7280815 ,-3.0464594 ,-2.810487  , 0.582805  ,-1.786865  ,-1.7263526 ,-0.36871073, 3.3955328 ,-3.9523299 ,-1.880003  , 4.9068613 , 4.6292953 , 3.9778202 ,-1.859954  , 2.8149757 , 4.5020967 ,-4.160163  , 1.9295161 ,-1.2508658 , 0.5669804 , 0.99246883,-2.4829247 , 0.88920474,-3.7942843 , 2.4626305 , 4.3087935 , 3.0680852 , 3.0893688 , 3.1640174 ,-0.41890725, 0.5377459 ,-4.0344224 ,-4.5812287 , 0.5720303 , 1.802316  ,-0.31413126, 2.9586952 , 1.1723012 ,-4.696369  ,-3.7047153 ,-1.8109767 ,-3.6122723 , 1.2727392 , 4.4057164 , 3.8347735 ,-4.739083  , 2.4655118 , 0.45258832, 4.0693913 ,-3.3486447 ,-0.64714307, 1.4990507 , 2.771129  ,-0.6109979 ,-1.0617865 , 2.0837703 ,-1.633663  , 1.8431798 ,-4.3942385 , 4.8523426 , 1.1941985 , 3.0366988 , 4.7991366 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..8a4c9eb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-2.2375767 ,-1.1274278 , 0.18025301,-4.598087  , 1.1042122 , 3.1241179 , 1.9084688 ,-1.214722  , 4.596646  , 4.1969523 , 4.658112  , 3.143779  ,-2.6940444 ,-1.5482163 , 1.542811  ,-1.1338089 , 3.721594  , 0.24673286, 4.71102   , 2.7811737 , 1.171089  , 4.145586  ,-2.6335135 , 1.1190183 ,-3.7932637 ,-4.6548123 ,-3.10302   ,-3.392706  ,-3.856141  , 0.6618614 , 0.9668614 , 4.4293485 , 1.3193    , 4.983464  , 1.659716  ,-3.185926  , 4.8983006 , 1.6323217 , 0.18800464,-1.9328839 , 4.6031475 , 3.459718  , 4.128766  ,-3.4701612 ,-2.3796144 , 1.6752707 ,-3.6569223 , 2.922704  , 3.642789  ,-1.6817225 , 3.151759  ,-1.5401909 ,-3.8259532 , 2.4556105 ,-4.4989905 , 1.2779988 ,-0.62634754, 3.5827441 ,-0.82541114, 2.1539748 , 4.583461  , 1.2231985 ,-1.4457659 ,-2.9194565 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..5110f86
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-4.011289  , 0.9077414 ,-2.8109396 ,-4.33598   ,-2.6516347 ,-3.917852  , 3.2461808 , 1.7588768 ,-1.9439132 , 2.190185  , 1.5180751 , 0.3587409 ,-4.3434815 ,-4.1376143 , 3.750847  , 1.5820616 , 0.03843357, 4.71235   , 1.0592757 ,-1.7640393 , 0.44547582, 2.8698466 , 4.5816092 , 4.6638517 , 1.4207541 , 1.863644  , 3.6007912 , 0.6800818 ,-2.4884489 , 3.0707197 , 3.3961668 ,-4.331953  , 2.7828538 ,-0.16146964,-4.9070745 ,-2.9787786 , 0.3337284 ,-3.935533  ,-3.303555  , 2.376896  ,-4.7058997 ,-2.2409894 , 0.07352693,-2.6024988 , 4.9593167 ,-4.7717366 , 1.6590588 , 4.063875  ,-3.8855767 , 2.6274624 , 4.901856  , 4.157007  ,-3.292969  , 3.579326  , 3.9860668 ,-3.0936542 ,-4.7793274 , 0.71697485,-2.0354068 ,-2.1414943 , 3.6339438 , 0.10732502,-0.86129206, 4.4152017 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..1a4fc3e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 2.2145607 , 0.88045335, 0.45151594, 2.852104  , 3.191637  ,-0.4578638 , 1.4858874 ,-2.1207588 ,-0.77495986,-4.1637363 , 0.83028954,-3.9974387 ,-3.3348315 , 3.7137656 ,-2.9883633 , 3.4332464 , 3.7178712 , 3.5850213 , 0.9240786 ,-0.07091421,-4.516931  , 3.965739  ,-4.828566  , 3.860382  , 0.3243482 , 1.6835089 ,-1.4710085 ,-2.6625636 , 1.942659  , 0.12808529, 1.3640044 ,-3.0124736 ,-3.646485  , 1.6046281 , 1.1087954 ,-2.4648561 ,-2.3274968 , 1.2196178 , 3.0752547 , 1.8316921 ,-2.926682  ,-2.247648  , 4.1264873 , 4.700915  ,-0.6861696 , 3.5246365 ,-2.5577545 , 1.832533  ,-4.3125343 ,-2.8579648 , 3.5299218 ,-0.67911506, 0.86782926,-2.918562  ,-3.3644724 ,-2.0097935 , 0.3721956 ,-1.3528451 , 3.8267515 , 4.916677  , 3.2055025 ,-0.64435905, 3.877367  ,-1.830818  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..09c06c7
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 4.5410523 , 4.4007382 , 3.3252192 , 0.40420002,-4.7642856 , 2.0282986 , 2.32176   , 3.160375  ,-4.3348713 ,-2.324847  , 4.327631  , 3.253995  , 0.53624976,-4.4896946 , 4.0600896 , 2.697662  ,-3.0693228 ,-4.7954664 , 2.010163  , 4.5790668 , 0.00921074,-4.638007  ,-2.612561  , 4.338762  ,-1.3632652 ,-0.55081725, 4.273717  , 3.1074166 , 3.1386747 ,-4.033469  ,-0.7298752 ,-3.4973295 , 4.454913  ,-0.5148646 ,-2.4100194 , 2.7154703 , 4.1507893 , 2.3424785 ,-1.7028755 ,-2.6013496 ,-1.831555  ,-4.07971   ,-1.039077  ,-1.8733021 ,-3.885844  , 3.5691998 ,-3.8779395 ,-4.7566814 ,-3.570575  ,-3.0510366 ,-4.6841617 ,-4.751285  ,-2.9700782 , 3.4774506 ,-1.3150035 ,-3.6287053 , 2.2280993 , 4.502896  , 3.9448938 , 3.3926914 , 1.560589  , 3.3307595 , 2.6545596 , 2.0503757 
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..24b7a24
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.5630627e+00,-4.5077333e+00, 6.8117022e-03,-1.1568142e-02, 2.3568916e+00,-2.9918964e+00,-4.8542055e-01, 4.7381549e+00, 3.1183126e+00,-2.6462586e+00, 3.0083582e+00, 1.4518642e-01,-2.4764729e+00,-4.8520207e+00,-4.8022575e+00,-1.8167463e-01,-3.1106927e+00,-2.4183941e+00,-4.1466684e+00,-3.6997426e+00,-3.9788694e+00,-3.0889416e+00,-2.2332447e+00, 1.8608164e+00, 2.8619974e+00,-3.6986623e+00,-1.3749057e+00,-9.2409855e-01, 2.7646086e+00,-3.3385031e+00, 7.6255083e-01, 1.0236104e+00,-1.7077237e+00,-4.4339476e+00,-1.1930060e+00,-1.7226344e+00,-3.1680160e+00,-1.8338548e+00,-2.6412952e+00,-8.2973856e-01, 4.2303777e+00, 3.4531716e-03,-3.3162324e+00, 8.4682000e-01, 2.5807633e+00, 2.7543969e+00, 6.8153429e-01, 4.7182851e+00, 4.2617507e+00,-1.4446728e+00,-4.3752551e+00, 3.5699592e+00, 9.6946698e-01,-2.0700858e+00, 2.0899124e+00, 1.6371955e+00,-9.5873147e-01, 3.1151581e+00, 2.9369416e+00, 4.4568644e+00,-9.4711387e-01,-4.1349549e+00, 3.3031983e+00, 4.1091359e-01
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..088eb62
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 2.5168443 , 3.7492614 ,-3.7076504 , 0.49709523,-4.642194  , 1.8201847 ,-1.396746  ,-1.0660223 , 3.3333528 ,-1.7719259 ,-2.3515563 ,-2.0570705 ,-4.7125244 ,-1.593302  ,-2.1072757 ,-4.4396334 , 4.3185077 ,-2.7568438 ,-0.59535027,-3.9871383 ,-2.6216223 , 0.39957425,-1.3687986 ,-3.1157744 , 1.2557942 , 2.3428473 ,-4.906711  , 3.5663006 ,-0.46128616,-4.7818427 ,-0.8876555 , 2.5066485 ,-1.3254607 ,-3.6097736 , 1.2686944 ,-1.37061   , 4.762917  ,-3.489012  ,-2.7905307 ,-0.2612837 ,-3.3236315 , 0.8347171 , 2.5582032 , 0.42744452, 1.7428764 , 2.4122005 ,-3.6781132 , 2.8811646 ,-2.7060914 ,-0.4752588 , 0.44432116, 0.5011615 , 3.2550313 , 0.02670379, 2.6197197 ,-4.319786  ,-1.4056181 ,-3.3794782 , 0.66822946,-1.4262298 ,-0.2465175 ,-4.6432767 ,-3.580772  , 2.960096  
diff --git a/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..bb81294
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/MaxPool2D_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.9356976 , 3.9426446 ,-4.746647  , 2.3674695 , 0.54803735, 3.1911538 , 0.28858757, 0.4800329 , 2.0652595 ,-4.5046906 , 0.21695825,-0.17217463, 2.4329293 ,-1.2274694 ,-0.11534467,-2.096684  , 2.6882868 ,-2.5291932 , 0.56199783,-2.0743406 , 0.95846254, 4.004705  , 0.89853394, 2.9610496 , 2.9799032 , 1.5339601 ,-1.7136513 , 2.1797504 ,-4.2055335 , 1.5059681 , 3.0828342 ,-1.7946475 ,-2.7096524 , 3.1037905 , 0.75922704,-1.1446673 ,-2.084073  ,-1.2888353 ,-1.6958839 ,-0.8388285 ,-1.0279479 , 1.1291095 , 4.080411  , 3.6791847 , 0.9237894 ,-4.70821   , 0.5730598 ,-1.3565379 ,-2.7533107 ,-0.4583869 ,-1.4416862 ,-3.6039822 ,-1.1611387 ,-2.6919081 ,-0.6557734 ,-2.9248757 , 1.4998456 , 3.2239568 , 0.23668556,-3.4410136 ,-2.3170567 , 3.66808   , 1.9004405 , 4.3537745 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..182eb52
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+ 3.4251418 , 1.8884782 ,-4.061519  ,-2.1329548 , 3.851976  , 3.668601  ,-0.7418167 , 2.379966  , 0.87259316,-3.96981   ,-4.627804  ,-3.3958297 , 3.025158  ,-1.299777  ,-4.322816  , 3.9173064 ,-0.55214256, 1.9224825 ,-4.8571157 ,-4.778045  , 3.3015614 , 0.56785774, 4.7985554 ,-0.4355816 , 4.9478025 , 1.7909397 ,-0.7620663 ,-0.09947702,-3.0230513 , 1.3817457 ,-4.5706887 ,-3.4097836 ,-4.7086477 ,-3.4651487 , 1.4401027 , 4.7513933 ,-1.0788624 ,-3.4946275 , 4.607974  ,-3.1215246 ,-1.4637078 ,-3.5266285 , 2.1268125 , 0.19458893, 4.058288  , 2.2452407 , 0.7575343 , 0.12213306, 4.885321  ,-1.2482406 ,-1.1034219 ,-4.054173  ,-3.6471267 , 4.774012  , 0.9450243 ,-2.5827825 ,-2.3991685 ,-2.8482654 , 0.9294943 ,-3.1165063 ,-1.6113516 , 0.04260086, 2.0987031 , 2.1601508 , 4.9740996 , 3.7719023 , 2.6817482 , 0.42131838,-1.4525859 ,-0.5124655 , 2.6313434 , 4.5606523 ,-4.6180778 , 4.788594  ,-0.8446551 ,-1.5460813 , 1.4288356 ,-1.9648911 ,-4.9766145 ,-2.405665  ,-0.30327383, 3.5204673 ,-3.848158  ,-2.6913974 ,-2.76141   , 4.336643  , 1.4205143 , 4.5898    ,-0.93183124, 4.2199287 ,-4.216924  ,-1.0979122 ,-2.3032405 ,-3.4457245 , 2.944412  , 2.137278  , 1.0326933 , 2.3116126 , 4.2138443 , 1.8283377 , 0.28901085,-1.8877143 , 0.50673705, 1.4360197 ,-2.924691  , 0.9819095 , 3.4656513 ,-2.541582  ,-1.9102442 , 3.3629627 ,-0.9675056 , 0.5937253 ,-2.4236617 ,-1.4193813 ,-0.7552614 ,-1.7121441 , 4.39647   ,-2.2712908 ,-4.3387337 , 1.5912663 , 0.8397044 , 0.17277755, 1.5272428 , 3.571715  ,-1.4471695 , 1.8623346 ,-4.3603377 , 1.2116091 , 4.960487  , 2.3681397 , 1.2925869 ,-4.3249073 , 2.4402251 ,-1.4506928 , 3.023616  ,-3.232099  ,-4.0106025 , 3.5774167 ,-0.6024932 , 1.0183483 ,-2.8215308 , 3.7395437 , 1.9100485 , 3.892712  , 4.6569633 ,-3.251774  ,-3.6923678 ,-4.8891983 ,-3.8605282 ,-4.0293036 ,-2.8199108 , 4.1668954 , 2.1569817 ,-2.9700332 ,-0.7035824 ,-0.5176811 ,-3.1826456 ,-3.334556  , 4.9103675 , 3.8513231 , 2.8609774 , 1.1845547 ,-1.4094447 ,-2.0445833 , 0.9833705 , 4.481276  , 3.83006   , 4.6240997 ,-4.268881  ,-0.85518706,-2.2650888 , 4.032545  , 0.9495817 , 1.1353155 ,-4.6551876 ,-2.2839146 , 2.6291692 ,-3.0398533 , 0.52652216,-1.8323399 ,-0.12300313, 0.46178594, 1.120684  , 1.4657134 ,-1.9794375 , 0.08941289,-4.4573083 , 2.7112565 , 4.9227715 , 2.4938288 ,-0.37153494,-4.1604757 , 4.7694197 ,-1.3021677 , 2.454714  ,-2.4902875 ,-2.760436  , 0.05183195,-2.6723208 ,-1.1471758 ,-2.2565122 , 0.20876396,-0.7288584 , 0.4386669 , 0.7846054 , 2.7294593 ,-3.836883  , 2.7501638 ,-4.775067  ,-3.2403855 ,-2.0307286 ,-1.6403166 , 4.9471517 , 1.0428456 , 2.5126355 , 3.0090203 ,-2.3476288 ,-2.9215205 , 3.8079188 , 0.83959275, 4.2670302 , 1.2338712 , 2.7329903 , 2.2549257 , 4.882931  , 0.12783106,-2.4392028 ,-2.4590807 , 4.2874207 ,-0.08333418,-3.4244132 ,-0.2235516 ,-4.23632   ,-1.3970895 , 2.1245553 ,-2.513883  ,-2.8092728 ,-1.9194845 ,-4.1932216 ,-3.7431748 ,-1.1063433 ,-3.714845  , 1.7230242 ,-0.19162221, 1.1123114 , 3.937181  , 2.6165597 ,-0.61531806, 0.44309503,-2.9260228 ,-3.1617007 , 0.0663496 , 2.4541974 ,-2.714474  , 4.2564497 , 1.2300675 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..dd80372
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-4.8834    ,-4.6238756 , 2.020674  ,-2.3068821 , 3.7487323 ,-0.36079448, 0.08661745, 3.423143  , 3.3073757 ,-2.709357  , 4.4810205 , 3.4159606 , 4.1597505 ,-4.249789  , 2.3782206 ,-2.02848   , 0.90137833,-0.6249625 ,-3.5300052 ,-4.1113796 ,-3.768913  ,-3.59854   , 2.0896666 , 1.7677166 ,-2.3101497 ,-1.0116942 ,-3.7846713 , 2.4777756 , 3.413987  ,-2.1964507 , 0.08637846, 0.02552292,-1.9918599 , 0.7785565 ,-4.065995  , 0.8808776 ,-2.0446506 ,-1.8421272 , 0.42566776, 3.8834689 , 4.900111  ,-3.0617309 , 4.0613194 ,-3.3601153 , 3.678536  ,-4.1136184 ,-4.2903633 ,-2.6918027 , 3.4335177 ,-3.9272869 ,-1.6882807 ,-1.9629028 , 4.2125826 , 1.6536059 ,-1.1801353 , 4.8443203 , 2.9393198 , 0.4306524 , 4.390743  ,-4.6322317 , 2.932263  , 4.140538  , 2.7385068 , 2.620753  , 2.0725663 ,-1.3642436 ,-0.48539641,-4.2409816 ,-1.5950899 ,-1.688442  , 4.4769464 ,-1.25038   , 3.462903  , 0.5011836 , 0.981037  , 0.63532305,-3.4727957 , 4.6721544 ,-3.481392  , 2.8904114 ,-1.7057139 , 1.0501702 , 3.0799537 , 1.6698593 ,-1.3895478 , 4.487443  , 2.5352533 ,-0.19357985, 0.78166926, 3.5892236 ,-4.3259463 , 2.8381345 , 1.3652785 ,-0.40142608,-0.62102544,-3.088937  ,-4.0266094 , 4.7095647 , 2.0513067 ,-1.8115149 , 0.11062156,-4.5980725 , 2.809295  , 4.2042894 ,-3.4689455 ,-1.3418434 , 2.9026117 ,-1.6125411 , 2.153075  ,-3.4445221 , 3.4869678 , 1.8746428 , 0.8482056 , 3.0525062 , 1.715966  , 1.7684505 ,-2.0022326 ,-4.3427444 ,-3.1659825 , 1.6855526 , 3.1612136 , 2.0646648 ,-3.972224  ,-2.91726   ,-3.5450957 ,-2.7226381 ,-0.3273488 ,-2.5905557 , 3.6621993 ,-4.3285728 ,-0.6200474 , 0.08522832,-2.1981175 ,-3.4179437 , 2.5989106 ,-0.8503352 ,-3.3723786 , 3.9595454 ,-0.5431398 ,-2.6962373 , 1.9689399 ,-2.8925    ,-1.2064192 , 1.606632  , 2.2728612 ,-0.1403075 ,-4.8031726 , 0.1549256 ,-1.3698703 , 0.78889227,-2.286554  , 0.96417916,-0.10438658,-3.8131578 , 2.9322996 , 2.4103441 , 4.4864798 , 0.02176606,-1.1966147 ,-3.6921146 , 4.943659  ,-1.0050472 ,-1.2238564 ,-4.5758605 ,-2.6865735 , 1.7294792 , 4.180183  , 3.157911  ,-3.581904  ,-2.9112866 , 4.1674094 , 3.2326035 ,-2.7883985 ,-0.09154221, 0.8667318 ,-4.532571  , 0.816668  , 3.1307516 ,-4.1993947 ,-1.0503744 , 0.123965  , 0.17691068,-3.1465137 ,-1.4964765 , 3.4077635 ,-0.35415363, 1.9092371 ,-4.709203  , 1.148622  , 4.4766874 ,-2.193539  ,-3.7959206 , 1.4420112 ,-2.5300896 , 4.107192  , 3.4666913 ,-2.1158516 ,-3.182484  ,-2.8406513 ,-1.9396024 ,-2.3695247 , 3.8301885 ,-1.5032169 ,-0.48879272, 0.41695955,-1.1829228 , 4.822825  ,-2.9244933 ,-3.8178608 , 2.7742817 , 2.6998327 ,-3.1187122 , 2.508593  , 1.2989064 , 2.3436947 ,-0.39074868,-3.034766  ,-1.8690065 , 4.850296  ,-2.4549792 , 4.839528  , 2.2758777 , 2.6689568 , 3.2014422 , 3.6975234 ,-3.2566156 , 3.546554  , 1.9570364 ,-2.753807  , 2.3366053 ,-4.357898  , 4.9184504 ,-1.0057111 ,-3.8582199 , 1.2416974 , 4.355522  ,-2.7863925 , 0.4679685 , 2.6850772 , 2.9984746 , 2.434312  , 2.9931593 , 2.2637212 ,-0.18371914,-4.07688   ,-2.0402577 , 0.5173147 , 0.19596666, 4.71653   , 4.291663  ,-3.3575501 ,-1.0857964 ,-0.16504912, 3.6683955 , 2.9581416 ,-1.354989  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..1295bfd
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 1.2340723 ,-1.7371651 , 4.271641  ,-2.3332376 , 0.82301813,-3.4199295 ,-0.75806665,-2.2647665 , 2.613749  , 2.2658496 ,-2.1277714 ,-0.465433  ,-0.1323059 ,-1.9658507 ,-4.7780223 ,-4.392719  ,-0.81063855,-3.639001  ,-3.6398284 , 4.6309023 ,-0.17483327, 1.7921627 ,-1.1493484 ,-3.8145075 , 2.2367268 ,-0.40209827,-1.4159911 , 2.3032134 ,-4.154446  , 1.6760192 , 2.3430173 ,-1.386683  , 3.3363335 ,-2.976934  , 3.3983    ,-0.0069695 , 3.7025425 ,-1.8683758 , 0.72029626, 2.7558882 ,-4.4060984 , 2.553126  ,-3.5888321 , 1.8549582 ,-0.52258795, 4.6549897 , 0.8886988 ,-3.0400214 ,-3.6890693 , 3.6663766 ,-4.8026586 , 1.0636287 ,-2.9774907 , 0.39021772,-4.2414255 , 2.914968  ,-0.24334456,-4.0344954 ,-1.1011956 ,-3.8205252 , 0.05693521,-4.1379023 , 1.0584197 ,-4.0404034 , 4.841462  ,-1.2727845 , 2.6974225 ,-4.2507453 ,-2.7101111 ,-2.9800036 , 0.3082796 , 3.6763537 , 2.3277721 ,-4.9667864 ,-2.4498677 , 0.2704629 , 3.006634  ,-1.1129389 , 4.373073  ,-1.2066779 ,-3.1575904 ,-2.721046  ,-0.861226  , 1.7315729 , 2.255666  , 2.5448847 , 3.1268334 , 1.5189171 ,-3.1992466 , 0.607633  , 4.0749955 , 1.2546133 ,-1.5335796 ,-1.6200712 ,-3.9392874 , 1.053699  ,-0.87970537,-3.9218261 ,-2.2724128 , 0.82235074,-2.3400521 , 3.6467028 , 1.6891364 ,-1.6333519 , 2.2639709 ,-0.08272895,-3.076964  , 3.731091  , 3.7932968 , 2.496441  ,-4.12142   ,-2.0908666 ,-4.994248  ,-0.0429902 ,-4.6083336 ,-4.522535  , 4.717733  , 1.6715643 ,-4.779822  , 1.2919815 ,-4.6121325 ,-0.6206874 ,-2.6633883 ,-1.9632595 ,-3.2203329 ,-0.6556523 , 1.3083993 , 0.13287744, 4.599294  ,-1.1777852 ,-2.9159715 ,-0.25669238, 0.48217958,-3.9736347 ,-0.774503  ,-0.7264863 ,-3.0058725 ,-2.1682055 , 2.6579158 ,-4.4020653 , 3.0450368 , 1.3798735 ,-4.9858127 ,-4.5812607 ,-3.7349749 ,-4.4158583 , 1.631093  ,-3.0769646 ,-3.8406906 , 1.6544044 , 0.36895755,-1.8196682 ,-2.0880237 ,-3.708266  ,-2.0277069 , 1.0536597 ,-3.6726243 , 1.1704421 , 2.3201573 , 1.4994124 , 4.0197086 , 2.1001272 ,-0.39845964, 4.879206  ,-4.6042013 , 4.367211  , 2.2712052 , 2.7754369 ,-3.156667  , 4.349216  ,-4.111492  , 1.0267047 ,-2.3381946 , 4.8876834 , 4.876814  ,-0.28538027, 4.8861    ,-0.95963717, 0.46279734,-4.5789995 , 0.26168647,-0.8879058 , 2.4468584 , 1.3030591 , 3.7261188 , 3.9933589 , 2.4964094 ,-1.3851117 , 0.7147012 ,-3.8367457 , 0.79737735,-0.5907085 , 4.317288  , 0.7659837 ,-4.821792  ,-1.466433  ,-1.147227  ,-1.8638811 , 2.5115767 , 1.9449657 ,-2.4122007 ,-2.4968379 , 0.7738737 ,-1.4761454 , 4.131583  , 0.4211128 ,-2.4312468 ,-1.9722428 , 2.2810268 , 4.950381  ,-0.0406047 , 4.67312   , 0.66613483,-0.28880936, 3.2917845 , 1.6225572 , 4.809879  , 0.48241946,-3.654634  , 0.68542016, 1.3973923 , 3.479005  ,-1.4296091 , 0.64391786,-4.0887494 ,-2.186845  ,-4.5834355 ,-0.67726034, 2.4158256 ,-2.4787726 , 0.4353257 , 2.9205139 , 0.10488439, 2.0790074 ,-4.5518365 ,-3.3856661 , 3.940736  ,-1.7141095 ,-4.8946457 , 1.1085542 , 3.785141  ,-2.4175835 , 3.7720537 , 4.623048  , 2.2239215 , 0.11616404, 0.09229392,-3.637964  ,-2.334849  ,-0.95000714,-2.1338253 , 3.2281857 ,-4.0220475 , 4.7304025 ,-1.8075961 , 0.2428817 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..378b5fe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.4605505 ,-2.7001262 ,-4.3874917 ,-2.9867616 ,-3.4332    , 0.76675916, 3.4377892 ,-0.6712793 , 1.8018581 , 1.8148962 , 2.0353577 ,-4.766427  , 3.2487285 , 3.886249  ,-2.8867183 ,-0.7906634 ,-4.376028  ,-4.2085958 ,-0.36025277, 0.6360799 ,-4.687723  , 4.8313313 , 3.3582768 , 2.1117954 , 0.9821817 , 3.3697798 ,-1.1784939 ,-3.1590316 ,-0.24019621, 0.20640443, 1.2808957 , 2.3346424 , 2.13951   , 0.61864626, 2.4020443 ,-1.9671458 ,-1.6852348 , 0.32225233,-2.3928862 ,-4.173372  ,-2.282281  ,-1.271318  , 3.0839682 ,-4.4726086 ,-0.635177  , 3.2710915 , 3.08071   ,-0.7311931 , 2.1444874 , 0.4102332 ,-3.332888  ,-4.8965516 , 3.903695  , 1.4920163 ,-4.041926  ,-0.3941788 , 3.6352818 ,-2.098405  ,-0.9248165 , 2.6277795 , 3.225142  ,-1.4461963 ,-4.2050753 ,-0.2213572 , 1.9704323 , 3.298732  ,-4.710403  , 3.6876736 , 2.0771818 , 1.3559113 , 1.328373  ,-4.4079022 ,-3.28067   , 3.8852313 , 2.322237  , 2.3243637 ,-1.9126451 , 4.6277676 , 1.7031307 , 0.74861574,-4.688967  , 3.9351206 ,-1.8054084 , 1.5824287 , 3.5381088 , 2.4798677 ,-3.3099444 ,-3.8518245 , 1.5562242 ,-1.9466928 , 0.08375791,-0.16754703, 2.9265418 ,-1.6599798 , 2.766202  ,-2.8269696 ,-0.19389874, 2.0869334 ,-1.5073173 ,-3.2024453 ,-3.6522708 ,-4.588111  ,-2.3425827 , 4.8709297 ,-1.4231887 , 1.0590451 ,-1.6406479 , 0.37192422, 0.7313186 , 0.3865313 ,-4.2832613 , 3.9712496 , 0.07653506, 0.2593589 ,-2.6036396 ,-0.45185068, 3.6537335 ,-0.6341783 ,-0.6381408 ,-1.0992868 , 2.766365  , 4.666631  , 4.416099  ,-3.6654727 ,-4.0626607 ,-3.4928396 ,-0.6944366 , 4.869798  , 4.2240977 , 0.9655519 ,-2.5654511 , 1.3396966 ,-3.7639391 ,-1.2369057 ,-3.7242758 ,-0.5189227 , 1.6548159 ,-2.6197302 , 4.2732763 , 2.239486  ,-4.316255  , 3.2419755 ,-1.9283817 , 0.22489135, 2.6034477 , 0.15818155, 2.0811818 , 0.836994  , 2.7832468 ,-0.68581384, 0.89475006,-3.1455147 ,-4.818614  ,-4.1738377 , 0.4281551 ,-2.935886  ,-3.7582467 , 0.58168256, 0.2854076 , 1.0492616 , 2.2415884 ,-4.4923434 ,-3.2479804 , 3.8439462 , 3.9802108 ,-0.9027783 , 1.7783072 ,-2.2782066 , 4.4638705 , 4.28735   , 4.291463  , 1.1685107 , 1.2765578 ,-3.7954235 ,-3.494621  , 4.4340134 ,-3.5995178 ,-4.3025713 , 3.3037348 ,-3.6675146 ,-1.7871013 ,-1.2922373 , 0.72924066,-4.7065907 , 2.1388702 , 2.3570008 , 3.9203117 , 0.07483537,-2.8389792 ,-1.795164  ,-4.380931  , 1.3189598 , 2.4404252 , 4.4774084 ,-1.2798066 ,-4.95842   , 1.8095461 , 4.2692375 ,-2.0918155 , 0.33083543,-3.794544  , 1.4940621 ,-3.9446015 ,-0.38208306, 0.30863285,-0.6832849 ,-2.5675633 ,-4.948772  , 1.5904989 , 3.0415509 ,-4.899339  , 0.9415345 ,-0.91124976, 4.4849253 ,-3.4605968 , 1.6737833 , 1.9091597 , 1.3111106 , 2.0829957 ,-2.1308084 ,-2.912219  , 1.1306196 , 2.231948  , 4.7522073 ,-2.1438766 ,-2.1000512 ,-0.2984778 ,-1.2093959 , 2.6259391 , 1.8113437 ,-4.137133  , 2.716111  , 3.4318748 ,-0.89123845,-3.70718   , 2.453927  ,-0.22418758,-3.098459  ,-4.4986243 , 0.85048616, 2.8023102 , 3.743153  , 0.9931644 , 3.8588202 , 1.7585737 ,-4.2855363 ,-2.5475764 ,-0.83141845,-1.9358089 , 3.1711586 , 2.4221613 ,-1.881327  ,-3.7230873 ,-4.55259   ,-0.42294836, 4.64625   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..3394354
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-3.37344313e+00, 2.78325319e+00,-7.30300546e-01, 1.33456266e+00, 3.96648932e+00, 4.33421373e+00,-3.11558557e+00,-3.64659280e-02,-1.73589993e+00, 4.81018400e+00,-8.32905114e-01, 2.33330703e+00, 1.85830116e+00,-4.60395622e+00, 5.26070774e-01,-4.71355534e+00,-2.97202754e+00, 3.57638383e+00, 4.50985909e+00, 2.08423686e+00,-1.85349309e+00,-2.18306184e+00,-4.65403509e+00, 4.31280661e+00, 1.16069472e+00,-4.85344124e+00, 8.40563923e-02,-1.98723459e+00,-4.29561710e+00,-2.57372570e+00,-4.22641230e+00,-4.00811911e+00,-9.61861551e-01,-2.14665198e+00, 4.18120289e+00,-3.87826174e-01,-2.86187083e-01,-4.84979200e+00,-1.34733701e+00, 1.27489030e+00, 1.98844969e+00,-4.11230135e+00,-1.61191213e+00, 2.63515592e+00, 4.35539484e+00,-1.56582773e+00,-2.45283508e+00, 1.44556177e+00,-8.56053472e-01, 3.25111747e+00, 3.58699083e+00,-2.47732449e+00, 3.64130282e+00,-4.91288567e+00, 8.97059917e-01,-2.26010180e+00, 4.91831064e+00, 4.45047706e-01, 1.88655663e+00, 3.20642543e+00, 1.38243341e+00, 9.06112790e-01, 1.15262544e+00,-2.39862514e+00,-2.87477684e+00, 7.36831248e-01, 3.18799114e+00, 1.22698748e+00, 5.63625395e-01, 1.29130912e+00,-4.89572334e+00, 2.11258578e+00,-4.55420208e+00, 4.94569272e-01,-7.08617330e-01,-1.84863120e-01,-4.81965256e+00,-1.06512284e+00, 4.79633398e-02, 2.70429182e+00, 4.78289175e+00,-2.11806059e+00, 4.23046875e+00, 3.18022132e+00,-8.39496255e-01, 3.13150501e+00,-3.24103773e-01,-7.48505890e-01,-2.45754886e+00, 4.16639376e+00, 3.25864077e+00, 3.40006447e+00,-3.77217412e+00, 2.93266010e+00, 3.33685803e+00, 1.02347994e+00,-2.22839618e+00,-1.90375733e+00, 3.24283957e+00,-4.01684284e-01,-4.45417643e+00, 3.74440104e-01, 3.33520865e+00, 6.64106190e-01, 3.84395885e+00, 2.38586918e-01,-1.51634857e-01,-2.64977455e+00,-3.45786500e+00, 4.89002228e+00,-1.07323432e+00,-2.92749858e+00,-1.76510501e+00,-3.44604325e+00,-1.89681911e+00, 4.20239258e+00,-1.75864971e+00, 2.13181686e+00, 3.90355319e-01,-4.11911535e+00, 6.61891177e-02,-4.32988214e+00,-1.42876351e+00, 3.12163901e+00,-4.56227779e+00, 4.17938662e+00, 9.63881195e-01, 4.35952139e+00, 1.61931109e+00, 4.11196423e+00, 2.25612569e+00,-4.77538586e+00,-1.72600198e+00,-4.39411783e+00,-8.98730099e-01,-1.04562032e+00,-2.81517529e+00, 3.57167959e+00, 1.90318239e+00, 2.17302442e+00,-3.79942179e+00, 2.19838643e+00,-4.16209459e+00, 4.45025682e+00, 1.68786839e-01,-2.56879544e+00, 3.60925221e+00, 1.06542781e-01,-3.48755455e+00,-6.77028894e-01,-3.51582170e+00, 3.90697241e+00, 4.49116230e+00,-1.56180394e+00, 4.96249914e+00, 9.63374436e-01, 2.72304177e+00, 8.38046610e-01,-2.91993833e+00,-9.41783428e-01, 8.00800502e-01, 3.89176035e+00, 6.70560122e-01, 2.76782703e+00,-1.37075472e+00,-3.25303817e+00,-4.41226482e+00,-8.38777184e-01, 1.73568249e+00,-1.09438455e+00,-1.08815920e+00, 1.06787062e+00, 2.04415274e+00,-2.93027782e+00,-6.86941504e-01, 3.83109421e-01,-3.49270535e+00,-2.13225913e+00,-3.61786675e+00, 1.32213378e+00,-2.89654016e+00, 4.23944092e+00, 4.53665400e+00, 4.26081800e+00,-1.95718706e+00, 4.72295076e-01,-3.08592963e+00, 2.53354859e+00, 3.80069661e+00,-1.14408419e-01, 2.39438844e+00,-4.73618507e+00, 2.35079074e+00,-1.43686843e+00, 1.32946157e+00, 1.10381134e-01,-3.49878430e+00, 2.83181930e+00, 4.57872486e+00, 2.29953095e-01, 7.19881415e-01,-2.97208834e+00, 4.11286211e+00,-3.89149117e+00, 3.83631349e+00, 4.14627981e+00,-1.14082299e-01,-6.89825296e-01,-2.55468488e+00,-4.04466152e+00, 9.95541453e-01,-2.59181118e+00,-4.60567427e+00,-4.77339029e+00,-7.36041367e-02, 1.85957468e+00,-3.42530179e+00, 4.55782986e+00,-3.29603004e+00, 3.55632234e+00, 2.40858841e+00,-2.07399082e+00,-3.96705031e+00, 4.41718817e+00, 3.19581985e+00,-3.72379017e+00,-3.76826024e+00, 6.79764748e-01,-4.43838930e+00, 2.29627752e+00, 2.34923697e+00,-4.23308420e+00, 3.80186272e+00, 8.65862250e-01, 8.44927967e-01,-1.05974531e+00, 4.70531940e+00, 1.25060010e+00, 4.82314730e+00,-4.53093815e+00, 4.51410580e+00, 4.95166332e-01,-3.45584202e+00, 1.82002666e-03,-3.27616286e+00,-2.68104935e+00, 2.39554620e+00, 2.99364328e+00,-2.57998848e+00,-4.35891914e+00, 4.64737415e+00,-5.74958742e-01, 6.47293210e-01, 1.85961032e+00, 4.49567413e+00,-4.36166048e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..e0e52c3
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.5734663 , 3.96675   ,-2.7826853 , 4.377681  , 1.8424977 ,-2.8312624 , 0.65628445,-3.7023883 ,-1.8941027 , 0.53154576,-3.9718776 ,-3.3961854 ,-2.7500536 , 2.6793208 , 3.3515985 , 2.0939343 ,-4.3965416 ,-1.7462187 , 0.5660886 , 4.497879  ,-2.2529721 ,-4.8996797 ,-0.00740948,-2.941367  , 1.9482567 ,-2.462802  ,-0.7897884 , 3.1501546 , 3.1216884 ,-3.506249  , 2.871302  ,-3.964653  ,-0.40679944, 2.8930066 ,-4.783338  ,-1.8733944 , 2.2654383 ,-0.41361305,-3.7790897 ,-1.9458629 ,-2.274427  ,-2.9192872 ,-0.73215395, 2.8135974 , 2.1402152 , 4.516366  , 1.58816   ,-4.607831  ,-3.5409598 , 1.9784997 , 3.11111   , 1.0872442 ,-3.6907403 ,-4.774325  ,-4.9267297 , 1.2962086 , 2.4646177 , 2.2726526 , 4.8766675 ,-2.9272413 ,-0.06221364,-0.80498594,-2.319938  ,-3.8261194 ,-2.3452706 , 2.5408983 ,-0.80628425,-1.4547366 ,-4.4171157 , 3.1584027 , 4.2213454 , 3.0342784 , 2.0285478 , 3.4517126 , 1.870827  , 2.812075  , 1.0776864 ,-4.524331  , 3.1467574 ,-2.366355  ,-4.7368546 , 1.940347  , 4.282059  , 1.2666475 ,-4.9559174 , 2.8177614 , 1.1941892 ,-0.25412267,-2.833778  , 1.1770393 , 4.9503546 , 4.582686  ,-1.0778978 ,-2.9030416 , 3.2517505 , 1.556093  ,-3.7605543 , 0.5915735 ,-2.6323159 , 4.596147  ,-0.90292877, 2.8230112 , 4.9295835 , 3.523853  , 1.7742149 ,-2.6014073 , 2.162894  , 1.9364033 , 4.0920115 , 0.81613404, 2.4198878 ,-0.907447  ,-4.79113   ,-3.4193892 ,-0.3334577 ,-1.0439668 , 4.2233415 , 1.4482704 , 1.3646252 ,-0.9206041 , 4.4994802 ,-4.2411633 , 0.6763335 ,-1.3827848 , 1.8579848 , 1.6426222 , 0.904467  , 3.876264  ,-4.6476808 , 4.576801  ,-1.4680524 , 2.441134  , 3.2343059 , 0.23119794, 2.5640545 ,-0.7293438 , 3.7184558 ,-1.6056752 , 3.1490617 , 4.6837263 , 4.7100887 ,-2.785927  ,-0.1520597 ,-1.9914767 ,-4.00598   ,-2.7502792 , 3.7857378 , 2.8444788 , 4.9911737 , 0.29277426,-4.779576  , 3.223367  , 1.3517398 , 4.8757277 , 3.8083189 , 1.7660266 ,-2.1543872 , 4.822371  , 2.089687  ,-4.7373757 ,-2.4061642 , 2.0387447 ,-4.067881  ,-3.1757388 , 0.24974413,-0.24441184,-0.1168329 ,-0.35149318, 2.0035832 ,-4.248678  ,-1.4723817 , 3.8218668 ,-2.8085105 , 4.6995482 ,-3.0093114 ,-3.648268  ,-1.0374364 , 0.04459473, 2.3945484 ,-0.63439727, 3.3920286 , 2.403765  , 1.303556  , 3.232244  ,-0.44932058, 0.9601637 ,-3.3821623 ,-4.257736  ,-4.095783  , 0.42818338,-4.925627  ,-1.8419602 , 4.9393196 , 0.8049334 , 4.431875  , 2.8487725 , 2.1205912 , 1.7367444 ,-4.337498  ,-3.574642  ,-3.8927085 ,-0.35219863, 2.8415039 ,-0.2887568 ,-0.89806557, 2.669602  , 4.8017626 , 4.278042  ,-1.2604581 , 3.152027  , 2.1625066 , 1.5039738 ,-3.7209976 ,-0.72354925, 4.006067  ,-3.7651584 , 0.7198826 , 3.9594896 , 0.6228397 , 2.8464649 ,-0.18740664,-2.0530953 , 3.5185826 , 2.5037062 , 0.3990585 ,-4.423475  , 4.6931167 ,-1.0078553 , 0.74727917,-4.289701  , 1.697721  , 3.4963684 , 1.5796075 , 2.296678  ,-2.9379995 , 4.4748416 , 0.25155628, 4.1183267 , 0.9506131 , 1.2903908 ,-4.6828184 ,-2.309908  ,-4.2793307 ,-2.2069294 ,-4.038367  , 4.641971  ,-2.3178709 ,-2.2683682 ,-0.96986157, 2.6649144 , 2.3106637 ,-1.8052462 ,-4.9433284 , 1.7941002 , 4.80127   ,-0.06690114
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..9a8f222
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 2.2282960e+00, 1.0135865e+00,-4.1930809e+00, 5.3674412e-01,-3.2516165e+00, 1.2745492e+00, 4.2867136e+00, 1.9524460e+00,-3.6757104e+00,-3.6086998e+00,-9.4525421e-01,-3.4005399e+00, 3.3607626e+00, 4.2363039e-01,-2.5177178e+00,-3.0130227e+00,-4.1442380e+00, 4.4951862e-01,-6.4387190e-01, 4.3701029e+00,-3.6790867e+00, 3.2749624e+00,-2.2554400e+00, 1.8269253e+00, 1.8358005e+00,-6.0994375e-01, 3.5964453e+00, 4.8953295e+00,-2.6134133e+00,-3.9301482e-01, 4.0286818e+00,-8.9392501e-01, 2.6430035e+00,-1.0339550e+00,-4.2311502e+00, 5.1657695e-01,-3.0095081e+00,-3.2156844e+00, 3.0075660e+00,-2.4905038e+00, 2.2380588e+00, 4.6933036e+00,-2.7880669e+00,-3.3672907e+00, 2.5187421e+00, 2.1843061e+00,-3.9957666e+00,-4.5409918e+00,-1.7282218e+00,-4.6849327e+00, 3.1863580e+00, 2.4342964e+00,-4.5180349e+00,-2.4310455e+00,-2.6789901e+00,-1.6438740e+00, 4.9613748e+00,-3.7800386e+00,-4.4277740e+00, 1.0571244e+00,-3.3765689e-02,-6.2219787e-01, 2.1075857e+00,-2.0555353e+00, 2.6996508e+00,-3.0303302e+00,-3.8262250e+00,-4.5048919e-01, 2.6760142e+00, 3.2696848e+00, 2.8136756e+00,-2.7064829e+00, 8.5861349e-01,-1.8871003e+00,-9.5355767e-01, 2.3704410e+00, 4.8897211e-02,-4.6371531e+00, 1.5693765e+00, 3.7866819e+00,-2.9738419e+00, 1.2106347e+00,-5.8760280e-03,-6.4124316e-01, 4.2396611e-01, 4.8550687e+00,-3.0650468e+00,-1.2087260e+00,-2.4833875e+00, 2.1272743e+00,-1.8991195e-01,-3.5372739e+00,-2.3402226e+00,-1.0234243e+00, 2.8981063e+00, 8.7964945e-02, 3.2136328e+00,-3.4051507e+00,-4.5538807e+00,-4.0228786e+00,-1.8993270e-01,-4.5704255e+00, 1.8850164e+00, 9.9910229e-01,-4.8424377e+00,-3.1492932e+00, 2.3922281e+00, 4.8503261e+00,-2.1037047e+00, 3.3602579e+00, 1.3546667e+00, 1.3481154e+00,-2.3604252e+00,-1.3253393e+00,-3.5330158e-01,-2.1313765e+00, 3.1442962e+00,-1.1570807e+00,-4.5890884e+00,-4.1608801e+00, 1.8554245e+00, 2.4646142e+00,-1.8453486e+00, 3.3489871e+00,-1.1248070e+00, 3.1451607e+00,-1.4458319e+00,-2.2727523e+00,-2.0378258e+00, 2.4566815e+00, 3.8839689e-01, 4.2570353e+00, 2.3613093e+00, 1.2956337e+00,-7.5734973e-01,-1.4549307e+00, 9.3240172e-01, 4.3444591e+00,-6.4935732e-01, 2.5328317e+00,-2.3545196e+00,-4.7553263e+00, 2.6134777e+00,-2.5526178e+00,-1.7996631e+00,-2.0215256e+00,-4.6141486e+00,-1.7283168e+00, 2.5297335e-01, 3.7009020e+00,-1.9858284e+00,-3.4631619e+00,-1.5858738e+00,-2.5620985e+00, 3.2822473e+00,-3.2632313e+00,-9.0714562e-01,-2.3562717e+00, 4.4088845e+00,-3.6630182e+00, 5.5761892e-01, 1.6045070e+00,-3.6806375e-01, 4.3184443e+00,-1.3219705e+00, 1.5496376e+00,-1.5801797e+00, 2.1545045e+00,-4.0106788e+00, 3.4172714e+00,-4.2495294e+00,-6.1115064e-03,-7.2607052e-01,-7.3130745e-01,-4.4462271e+00, 4.8119636e+00,-4.7460346e+00,-3.0464313e+00,-2.8801811e+00,-1.4347218e-03, 4.4133449e+00,-3.3173063e-01, 4.3802023e+00, 2.6040417e-01,-2.5531218e+00, 3.7436140e+00,-4.1636271e+00,-3.3907690e+00,-1.4418361e+00,-3.6933661e+00,-2.6342602e+00,-3.1492887e+00,-5.5590755e-01,-1.6814464e-01,-1.0868104e+00, 4.9451909e+00, 3.4104226e+00, 1.0342516e+00, 4.7993002e+00, 1.2480364e-01, 1.6109833e-01, 2.6366503e+00, 1.6535910e+00, 4.3810592e+00, 4.4755011e+00, 4.3265424e+00,-3.1934264e-01, 9.8549920e-01, 1.9962710e-01, 2.8525822e+00,-3.7352023e+00,-1.3402178e+00, 2.5931063e+00,-2.6708813e+00,-7.6831090e-01, 3.0769660e+00, 1.4107993e+00,-1.8936746e+00,-4.7568636e+00,-1.9222193e+00, 4.7693071e+00, 2.8644614e+00, 4.1877995e+00,-3.6974251e+00, 4.5314616e-01,-7.1986055e-01, 4.8653622e+00, 1.4722897e+00,-8.6220115e-01,-4.1846976e+00, 3.7767217e+00, 3.7630556e+00,-4.5851058e-01,-4.9183292e+00,-1.8750135e+00, 1.0773923e+00,-5.2709883e-01,-9.2767686e-01,-1.3984675e+00,-2.0892789e+00,-4.3801632e+00, 4.0080590e+00, 4.2269025e+00,-1.2195336e+00,-2.2649438e+00, 4.6874623e+00,-3.8354571e+00, 5.9588730e-01,-2.8315885e+00, 3.0605823e-01, 2.1416895e+00, 1.6045133e+00,-3.3075256e+00, 4.9898911e+00, 1.7708080e-02, 3.5305614e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..1b2e334
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 1.9229428 , 2.1045275 , 2.0514195 , 1.7149676 ,-4.1647053 , 4.3958654 , 2.1192055 ,-2.4357705 , 2.249189  , 4.7986865 ,-1.0146881 , 2.5108647 , 0.7262246 ,-2.3110187 ,-0.434008  , 2.6220334 , 1.3261455 ,-2.0402927 , 0.6362597 , 0.12827367, 0.94167644, 1.6396433 , 2.802215  , 0.92637545,-2.8669958 , 2.1684341 , 4.7197456 ,-3.0393784 ,-1.5588902 ,-1.5589788 ,-1.2792847 ,-4.301159  , 3.6853306 , 3.5522077 ,-3.5120559 , 3.6523628 , 0.52381915,-4.3210206 , 3.1021209 ,-4.4059095 , 4.574733  ,-3.708168  ,-3.4609973 , 0.04494883, 4.6041393 , 4.6209555 ,-2.184693  , 3.3114836 , 4.0440845 ,-4.362543  ,-3.0185041 ,-3.4911432 ,-1.0443465 ,-3.1546419 ,-3.0831194 ,-1.8959469 ,-3.7653599 ,-1.8753844 , 3.969308  , 4.0960746 , 0.256032  ,-0.11065102, 4.753394  , 4.8433857 , 0.17249103, 0.44612473, 3.5996687 ,-3.7071083 , 4.15448   , 2.7609568 , 0.7979912 , 2.6985793 , 0.24981445,-0.7343978 ,-3.8946455 ,-3.4738345 ,-2.0124238 , 4.6603985 , 0.9002829 ,-2.2128618 ,-0.8752893 ,-3.0990481 , 2.770291  ,-1.4642559 , 0.4561498 , 0.5808671 , 2.4227936 ,-2.400878  , 0.6494001 , 1.0195295 ,-3.2693145 , 1.9889433 , 3.5208216 , 3.6280289 , 4.322899  ,-2.805155  , 3.7704606 , 0.6797415 , 4.442675  ,-0.5069875 , 1.3373847 , 4.6953626 ,-0.7946793 ,-2.7352958 ,-1.9969261 , 0.43059692, 2.50853   , 1.9314603 , 1.3780333 , 2.0536468 ,-1.572231  ,-4.5323825 ,-1.3175989 ,-1.5515776 ,-0.05870355, 0.32408538,-4.2935586 ,-1.561555  ,-1.7551405 ,-0.93950266, 3.2540953 ,-4.623753  ,-3.4944966 ,-0.7603045 , 0.76591074,-4.9114766 ,-2.679303  , 0.12950227, 4.094419  , 4.781908  ,-3.6946337 , 2.766349  ,-0.45678583,-2.275264  , 2.0858452 , 3.1182098 ,-1.2942638 , 4.4418044 , 2.2264028 ,-3.3838644 , 1.4427853 , 3.7365992 ,-1.1815038 , 1.4555137 , 0.22728541,-0.18817298, 3.454521  , 3.1835914 , 4.0786743 ,-1.5111316 , 1.1560454 ,-0.04693017, 0.44183066,-0.7420173 ,-1.2243766 , 3.4453049 ,-2.969513  ,-0.82397145, 4.870895  , 3.0178127 , 1.7217305 , 4.482936  , 1.9468685 , 3.9970267 , 4.7294793 , 2.9921744 , 4.470473  , 4.7626653 , 0.13104612,-4.651569  , 2.7991815 ,-4.734433  ,-2.4499187 , 1.0739365 ,-1.5583646 , 3.6531756 , 2.7731194 ,-4.72427   ,-4.5801177 ,-4.035709  , 2.5767221 ,-2.8133557 ,-1.8342617 , 3.5808434 ,-2.1022995 ,-3.5421894 ,-3.0776916 , 3.168665  ,-0.07246887,-1.2413273 , 4.7964606 ,-1.0624843 , 0.75939703, 2.5336463 ,-4.8622346 ,-4.9744167 , 2.1007512 , 1.5271608 , 0.37077245, 1.7765028 , 2.2724373 , 2.1864665 ,-0.37378153, 1.3559381 ,-1.4220421 ,-1.4756224 , 3.6143627 , 2.7846546 ,-2.5194893 , 3.005039  ,-3.6451447 ,-1.9118739 , 0.04718782,-3.0775185 ,-1.4801219 ,-2.35909   ,-0.4728799 , 4.610093  ,-4.472677  ,-4.530808  , 0.12514372, 0.05973044, 4.457302  , 3.1129916 , 3.6036162 , 4.5086145 ,-3.548999  , 0.4976606 ,-3.6525648 ,-2.1937015 ,-1.3205789 ,-2.6594079 , 4.415343  , 3.219482  ,-3.7286756 , 3.4116418 , 0.82889384,-3.0168123 , 4.382766  , 2.7633846 , 3.6949344 , 3.9806223 ,-0.6415279 ,-0.3193684 ,-1.3176754 ,-1.4990829 , 4.694691  ,-1.0581211 , 1.2103747 ,-0.26690048,-1.157015  ,-1.8951306 ,-0.8580171 ,-4.3080263 , 4.0737123 ,-1.2607352 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..50ed090
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9386005 , 3.7248888 , 3.3261378 , 4.8302746 ,-3.9337704 ,-4.2943096 , 0.16059242, 0.17785172,-2.4971933 ,-2.933359  ,-4.598231  , 4.7816315 ,-0.6563864 , 4.452592  , 1.8066075 , 3.1572745 , 4.500678  ,-1.1609873 ,-1.6962403 , 1.567031  ,-3.3120036 , 1.8150452 ,-2.7486987 ,-1.6800771 , 1.4895486 , 1.120401  , 1.4983965 , 4.7132416 , 0.39645562,-3.12486   ,-0.5966056 , 4.618641  , 1.225812  , 0.99017185, 3.9918585 , 1.299415  ,-1.2995726 , 4.202907  , 3.8657827 ,-4.0268126 ,-0.90370494, 0.5030568 ,-2.9651542 ,-4.1249614 ,-2.8990393 ,-4.1228724 ,-1.2640246 ,-0.72640723,-1.7128279 , 2.7710931 , 2.8189523 ,-0.8384207 , 0.71266395, 3.8393862 ,-1.7801509 ,-3.1485069 , 3.2076547 , 2.267659  ,-3.745656  ,-4.373508  , 0.86005193,-4.9145784 , 0.9253047 , 1.1243923 , 0.46507052, 1.9978004 ,-4.642887  ,-2.1898057 , 0.88199854,-2.1837327 , 1.1112527 ,-1.4548608 ,-3.5766103 ,-1.5607064 ,-3.630397  ,-1.9193211 ,-0.8931484 ,-0.2812017 ,-1.2881653 ,-2.5051243 ,-3.5648384 ,-0.5431733 ,-0.47036746,-2.8132265 ,-0.4302025 ,-4.003176  , 0.31743896,-3.074693  ,-3.3994603 , 0.62276137, 0.12920536,-2.5154057 ,-0.22098878,-2.711012  ,-0.303956  , 4.6025276 , 3.1887815 ,-0.50345755,-2.6543994 ,-0.8452558 ,-1.4075644 , 3.6716504 , 2.7388885 ,-4.9426928 , 3.5494354 , 4.777085  ,-3.3904083 ,-2.4746811 ,-2.943489  , 1.3607427 , 1.313449  ,-2.7959676 , 4.5932074 , 0.2460288 ,-1.1802251 , 0.6807028 ,-3.7335384 ,-0.30950046, 0.0558207 ,-4.7604976 ,-4.5745177 ,-3.3872643 ,-1.102581  ,-1.5612804 ,-1.2933319 , 4.5290637 ,-2.5096595 , 0.8673844 , 0.6069363 , 0.8294639 ,-0.05487671,-2.5923786 , 3.2974155 , 2.252853  ,-2.4157743 , 1.6614583 , 1.975577  ,-2.7390766 ,-0.26459846, 0.8946814 ,-3.257953  , 4.0526175 ,-1.5219783 , 4.6063023 ,-0.09599628, 3.2825923 , 2.0063279 ,-3.597641  ,-0.41604096,-2.5593333 , 1.8169669 ,-3.6998532 ,-2.3723404 , 0.4008657 , 2.1002467 , 4.9284163 , 4.6011457 ,-4.8977246 , 4.7852945 , 1.2170111 ,-1.055987  , 2.27575   , 1.0601226 ,-4.176826  , 0.08197393, 4.0421042 , 3.6263971 , 2.6941037 ,-2.644993  , 0.10439859,-4.512112  , 3.7939842 ,-4.8532767 , 0.391317  , 3.6432517 ,-3.9992728 , 0.29700363, 1.2722415 ,-2.3793647 ,-3.377246  , 2.0930648 , 2.574604  ,-1.2509564 , 0.4457573 ,-0.46469867, 2.6793416 , 0.02566718,-0.11948132,-3.1046712 ,-0.6204446 ,-4.615342  , 4.057695  , 1.1312845 ,-3.0446556 ,-1.9381613 ,-0.92255247,-3.5459394 ,-1.1972907 , 0.5879403 ,-1.2265042 ,-2.6279037 , 3.7533212 ,-0.2950134 ,-1.6104454 , 4.7811155 , 3.9216835 ,-2.2905827 ,-3.9489107 ,-4.078132  , 4.878544  ,-2.1483154 ,-3.1480436 ,-1.8742744 , 0.38310575,-4.0457416 ,-1.5423136 , 4.9426446 , 2.80434   ,-2.758338  , 1.6596367 ,-4.559686  ,-1.2686385 ,-1.2173673 , 0.49475643,-2.4956207 ,-1.5008336 ,-1.7967415 ,-1.1574938 , 2.2852411 , 1.7171949 ,-3.328038  ,-3.1454384 ,-0.41883984, 3.822312  , 1.1161699 ,-1.5137968 , 3.1651397 , 3.2411747 , 1.2685378 , 2.7408757 ,-3.078621  , 3.3460293 ,-0.34918678,-1.0433053 , 0.9397743 ,-3.9071774 , 0.68924445, 4.896269  ,-4.234603  ,-4.8659916 , 1.472339  , 4.5464644 , 0.35857418, 3.4065645 ,-1.514736  , 4.2301235 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..163c037
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mean_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-0.91463715,-2.9258113 , 4.4465976 ,-0.84762925,-3.3510911 ,-0.15094744, 2.2284694 , 3.9705405 ,-1.6315348 , 4.698665  , 2.8595035 ,-2.4719086 , 4.2091336 ,-3.7003224 , 0.06198901, 4.24617   ,-3.7041452 , 1.4280707 , 0.61925036, 3.873551  , 0.3554166 , 3.0535998 ,-1.403015  , 2.5769274 , 4.0060935 ,-2.134697  , 0.61366636,-2.2069314 , 3.5629356 ,-4.94381   , 3.3054771 ,-0.42945656, 4.4868546 , 4.124087  ,-4.039486  , 0.75716823,-4.530404  ,-0.8464823 , 2.7817092 ,-4.954212  , 4.790015  , 2.5307322 , 0.635834  ,-3.393037  ,-3.7000508 ,-1.1439751 ,-2.4422479 , 3.9414582 ,-4.0586324 ,-3.5872777 , 2.2529798 , 0.50453144,-2.9947112 ,-0.76174486, 0.8427806 ,-0.90798455,-0.5518859 ,-1.1810572 , 1.2787138 ,-1.7791113 ,-4.661412  ,-3.7413049 , 0.03910514, 3.970302  ,-3.0697417 ,-4.107844  ,-1.985001  ,-2.434408  ,-3.0120797 , 0.34467867, 0.09826441, 3.1933572 , 0.09855966, 1.7976784 ,-3.3814316 ,-2.8423817 ,-4.787137  , 0.21746217,-1.8560363 ,-0.7145455 , 3.911294  , 4.6970305 ,-4.0105987 , 3.3843613 , 2.3087065 , 1.8619018 , 1.6607213 ,-4.1276345 ,-0.15251912, 3.1198032 , 1.8143575 , 2.178214  ,-4.6250186 , 4.4006424 ,-3.378407  , 3.6481302 , 4.4439235 , 4.5322957 , 2.7754776 , 1.9026359 ,-2.9371052 , 0.32501587, 4.980984  ,-3.2300677 , 4.190388  , 4.441369  , 0.8116277 ,-4.7056756 , 1.1501676 ,-0.9759702 ,-0.1920487 ,-3.2009268 , 4.654679  , 4.043145  , 4.579935  , 4.917842  ,-3.2166183 , 2.381046  , 2.3470554 , 0.04456256,-2.6785278 ,-2.1683002 ,-0.2686819 , 0.6097173 , 1.5071467 , 3.9692068 ,-3.4313831 ,-0.87708473, 3.9917011 , 0.7843428 ,-4.6622047 , 0.774621  ,-4.6538844 , 3.6392822 , 4.962988  , 1.4132729 ,-0.40482154,-1.8656421 ,-1.6113061 ,-1.3454957 , 0.40846685,-4.5410986 , 2.7158992 ,-1.8403106 ,-3.803351  , 4.406537  ,-1.5868717 , 2.7034876 ,-3.3383765 , 4.6084027 ,-1.691095  ,-0.52188784, 2.9010768 , 0.08786624, 2.7466853 ,-1.7457972 , 0.59371734,-0.1716976 ,-2.6220891 , 4.9432936 , 2.3500183 , 1.6905144 ,-2.7329378 , 4.003541  ,-1.1137847 , 3.9017355 , 0.9116626 , 4.233729  ,-2.6706429 , 3.4342804 ,-0.42729262, 1.174779  ,-4.944099  , 1.2316282 , 4.9237943 ,-2.2999635 ,-4.9210916 ,-1.9033331 , 0.43241265, 3.2149148 , 4.1269703 , 0.8590868 , 2.734273  , 1.658618  ,-2.1702065 ,-2.0058317 , 4.0706363 , 4.003833  ,-0.35835287, 2.5514262 , 1.2571276 ,-4.655018  , 3.6468434 , 0.06320113,-4.662375  , 1.0745742 ,-1.117399  , 4.167245  , 4.59434   ,-1.686359  ,-0.17328739, 0.3083307 , 3.3926466 , 2.2254786 ,-0.45468137, 2.4956248 ,-3.492782  ,-2.9805465 ,-1.0610795 ,-0.2784433 , 0.7163735 ,-3.0048254 ,-1.8024784 ,-3.3139167 ,-1.8410577 , 4.5702477 ,-3.4454951 ,-1.4504164 ,-1.7432297 ,-4.998418  ,-2.5524495 , 3.028534  , 4.075326  ,-2.2187853 ,-0.6484594 , 3.00815   ,-2.8010397 ,-4.5529976 , 1.7830837 , 0.3373458 , 0.19151935,-1.0437245 ,-3.6349878 , 1.1947471 ,-1.9664146 , 0.27316815,-0.20781417, 2.419226  , 0.02246885, 4.5222287 , 3.1069999 , 3.940458  , 4.2710595 , 3.4216619 , 2.8447206 , 2.7136886 ,-0.60954016, 2.9277234 , 3.995615  ,-0.30593097, 1.7800944 , 1.0608315 , 3.8786283 ,-2.7564247 , 1.8526665 ,-3.8638606 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..e580d6f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-4.024665 , 3.0544488,-4.5645285,-3.2134292,-2.1543078, 4.039755 ,-4.613908 , 4.2014904, 3.8222141,-4.4992657,-4.02681  ,-3.2933445
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..c593dfb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-2.669042  , 2.479217  , 4.691815  , 1.8187722 ,-3.7656548 ,-2.0555806 ,-2.4494352 ,-3.2394514 ,-0.38215363,-1.543695  ,-0.6927158 , 2.3534324 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..14520a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+ 4.036224  ,-1.2903051 , 1.2116423 , 3.92255   ,-0.48049024,-1.0290806 ,-0.9644837 , 1.3379688 ,-1.0027533 ,-1.9611529 , 3.7190473 , 0.45794436
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..2238d5e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 4.560488 ,-1.2475324, 1.8892838,-2.0155866,-4.968927 , 0.3717404,-0.6095849, 3.2483344,-1.2499679, 1.4237018,-3.1225715, 3.0611598
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..14a91cc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-1.7167594, 2.116633 ,-1.3816848,-1.7106141,-3.273076 ,-4.148302 ,-2.1654181, 0.4368236, 3.4279666, 1.2954224, 1.3004405,-4.3022   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..3b2a3c2
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 4.9167333 , 0.9170983 ,-2.4031715 , 0.4819133 , 0.21536288,-2.0262568 , 4.364642  , 1.7851653 , 2.0982797 , 0.5736603 , 2.5769486 , 3.68285   
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..dff8a3b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 3.8708763 , 3.263454  ,-4.796817  , 0.6411522 ,-3.0385532 , 0.49334133,-0.20283684,-0.88814104, 4.826072  ,-4.8037696 , 4.757636  ,-3.036691  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..93e7472
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-3.8694625 ,-3.5254061 ,-0.23680535, 4.1042504 , 3.2534697 ,-1.8511593 ,-1.9182487 , 2.6457057 , 0.12923336, 2.618141  , 1.2465005 ,-4.4625525 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..c924e03
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.5559328 , 1.768443  ,-1.4850446 ,-1.2771453 ,-2.7216687 , 2.80077   , 0.21637216,-0.6145739 ,-0.37175298, 3.8750615 ,-1.9910356 ,-1.657059  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..1153c85
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Mul_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-1.6168976 ,-3.816399  ,-0.55625045, 4.961818  , 0.19316113,-2.6601286 ,-1.6928803 , 4.1208386 ,-1.4012221 , 2.7742999 , 0.75798005,-2.5877    
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..1f29932
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-3.3436873 ,-0.79453826, 2.2211137 , 2.6420908 ,-1.3191302 , 1.2973647 ,-4.506594  , 4.867371  ,-4.318404  , 1.6957753 ,-4.3091793 ,-3.2230556 , 4.9175825 ,-3.1527104 ,-2.6669753 ,-2.1135337 ,-3.7701926 ,-3.358504  ,-4.419803  , 3.2045574 ,-0.5828494 ,-3.5796826 ,-4.0088696 ,-4.7178082 , 2.2726505 , 2.1860175 , 3.7198956 ,-0.5788681 ,-3.7766652 ,-0.65016747, 3.707159  ,-2.240267  , 4.5772953 ,-0.54754776, 4.7143884 ,-3.196982  ,-3.6356654 , 3.7157805 , 3.1312432 , 0.58816016, 2.1710336 ,-1.600533  ,-3.689763  , 4.322089  , 0.4816874 , 2.2769346 ,-3.9072733 ,-0.58615017
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..a19ea66
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+-1.275483  ,-3.6622071 ,-0.87433696, 0.60946655, 1.4415421 , 3.3705983 , 2.2635043 , 3.3926573 ,-0.2936643 ,-0.5169573 , 3.2535644 , 2.1269164 ,-3.4180303 , 1.0427854 ,-1.3514856 , 3.6084783 , 4.569944  ,-0.79272085, 2.9771423 ,-1.6668562 , 4.8700657 , 0.3355385 , 0.76509756, 3.5142152 ,-1.6743544 , 4.794434  ,-2.958765  ,-0.23857778, 2.4555902 , 2.459867  , 3.3922994 ,-4.350212  , 0.6286153 , 0.8139546 , 4.1676807 ,-3.3461437 , 0.69633776,-4.6548877 , 0.98267466,-4.508397  ,-1.4581255 ,-1.2289628 , 3.8701873 , 3.334336  ,-3.5611253 , 2.6133575 ,-1.0554558 ,-3.3291767 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..7113eb5
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.6250365 ,-4.798417  ,-4.214081  ,-3.625409  , 2.4391694 , 4.1856265 , 3.2472587 ,-3.20996   ,-2.3537548 , 1.3749354 , 2.5947835 ,-1.8891864 ,-3.612735  , 2.246563  , 1.2701501 ,-2.8927476 ,-0.71078295,-3.6037376 ,-4.5916877 , 2.0044398 , 3.4437728 ,-1.0695096 , 4.3483944 ,-3.3387017 ,-0.9384242 , 1.4229002 ,-0.6568144 , 1.1164346 , 1.7145283 ,-2.596518  , 4.6728883 , 3.4737296 , 1.7935314 , 3.1263895 , 1.3614839 ,-3.824968  ,-3.0405738 , 3.1729462 ,-4.1985774 ,-2.9489865 ,-4.2080064 , 2.0368521 ,-2.858539  ,-0.03206728,-1.1123812 , 0.2994737 , 1.6906137 ,-0.8665008 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..afeb2c0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-4.5279946 ,-3.4497826 ,-2.058617  ,-0.39549035,-0.26672208, 3.0173857 , 3.2430282 , 1.9996022 , 1.3895315 , 1.7620904 ,-4.9040093 ,-3.2858686 ,-2.2823575 ,-1.4176623 ,-0.537347  , 0.68219584,-3.193989  ,-3.1675165 , 0.47214374,-4.390378  ,-1.8730192 , 1.4416525 ,-3.0460286 ,-0.73547626, 1.8686327 ,-0.8146671 ,-2.0906649 , 0.01226121,-0.06992937, 0.9302521 ,-2.1858516 , 4.8370657 ,-4.1847024 , 4.4963436 ,-1.3834711 ,-1.1244944 , 0.4290957 ,-4.2681174 , 1.2978764 , 3.4149706 ,-2.7011304 ,-3.1285405 ,-3.8857136 ,-0.18625297,-0.13618916, 2.427405  ,-1.7979074 ,-1.4174187 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..99c6284
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-0.40635094,-2.485209  ,-2.9641154 , 4.09174   ,-1.9137962 ,-2.0860991 , 1.6594787 , 0.53744185, 1.7737653 ,-1.7054961 , 2.5611186 ,-1.1456238 , 2.741241  ,-2.283051  ,-4.2111306 ,-0.8722772 , 1.6465468 ,-0.61518955, 0.08495517, 3.6847656 , 3.7826371 , 2.0023444 ,-3.5326133 , 2.3723035 , 3.7383325 ,-3.3514297 , 2.031452  ,-0.7364658 ,-4.3347225 ,-2.8146286 ,-1.37377   ,-3.518721  ,-0.19657679,-1.6831368 , 1.2457223 , 0.25099897,-4.4722757 ,-4.135197  ,-0.6378818 , 3.8833187 , 1.9291897 , 2.5969315 , 2.146067  ,-2.846719  ,-2.2562532 ,-2.6856182 , 2.824374  , 2.3662992 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..081a1e6
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+-1.9927613e+00,-1.7386111e+00, 4.0895696e+00, 3.7818990e+00, 1.9420158e+00, 2.8482721e+00, 1.9165717e+00, 3.0059583e+00, 1.8346788e+00,-1.9055414e-03, 4.9277787e+00,-2.2794118e+00, 4.4005270e+00, 4.9703922e+00,-4.5275192e+00,-4.0446317e-01,-4.9363256e+00, 4.9506269e+00, 5.5874938e-01, 3.9949589e+00,-3.8152415e-01,-4.1024357e-01,-3.8472393e+00, 4.2956004e+00, 4.8097472e+00, 1.7960385e+00, 1.6767026e+00,-2.2773645e+00, 2.6808765e+00,-3.7214172e+00, 4.0978761e+00, 3.6202488e+00,-3.3211513e+00, 3.6200387e+00,-3.6106458e+00,-3.9778764e+00, 3.8779631e+00,-4.8502750e+00,-2.1901150e+00, 3.1800017e+00, 4.6261444e+00, 3.5151103e+00, 2.8659137e-02, 4.5340648e+00, 1.9836371e+00,-2.1751235e+00,-4.6762753e+00,-3.6951694e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..f6b31db
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-4.7488093 , 4.805902  ,-0.29828382, 0.57486725,-4.864297  , 1.1832287 ,-1.7611881 ,-2.7058024 , 2.707353  ,-3.9832466 , 3.1243927 ,-4.795229  , 1.9835415 , 3.2291937 , 2.4303932 ,-3.556881  , 4.316894  ,-0.6444627 ,-3.8289468 , 4.012964  , 0.7878584 ,-1.8921386 , 2.779619  ,-3.762597  , 3.4239094 ,-0.9103423 ,-3.9791772 ,-2.5613685 ,-4.4910364 , 0.19411987, 4.6296096 ,-0.6827259 , 3.7645729 , 1.5309091 , 3.5163064 , 3.4726381 , 3.5372822 , 1.7671971 , 1.4374614 , 3.5783768 ,-2.4927518 , 3.9427729 , 2.431568  , 2.6959393 , 3.8100271 ,-2.099064  , 3.3663592 ,-2.0818436 
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..acc01cb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 4.279912  ,-2.2746763 , 4.0609813 , 4.5353827 , 3.624241  ,-3.9593613 , 4.189409  ,-3.9370356 ,-2.7063863 ,-1.9987059 , 4.172294  ,-4.5454354 , 4.362368  , 2.2204642 ,-4.9866576 , 3.31571   , 0.12623785, 4.7834573 ,-1.3521448 ,-1.5408021 ,-4.6578984 ,-2.93307   ,-1.5684534 ,-1.6875995 ,-0.4278419 , 1.1314197 ,-2.9655704 ,-0.48032767,-1.9200082 , 1.3321692 , 0.87586147,-0.1761448 , 3.939337  ,-1.0270193 ,-4.807054  , 2.8373904 ,-1.1184337 ,-0.8979197 , 2.1442132 ,-2.8509672 ,-3.3741531 , 3.6592414 , 0.7632272 ,-4.11465   , 4.892313  , 4.715815  ,-4.6481915 , 0.24676175
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..0f0b7a9
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-2.0949495 ,-1.1370499 , 4.6457314 ,-2.243915  ,-1.7996464 , 1.2268789 ,-4.938172  ,-3.2802615 , 1.8788282 , 4.4162655 ,-4.8805113 , 3.1269526 , 3.2644348 , 0.89842725,-1.4484432 ,-0.28381723, 3.046261  ,-1.0718596 ,-3.996107  ,-4.9575796 ,-2.2279077 , 1.5326967 , 4.4588428 ,-2.042381  , 4.6604958 , 4.6422915 ,-1.097833  , 3.666126  , 0.4735639 ,-4.480704  ,-4.831033  ,-0.27288163, 4.588138  , 4.5297036 , 4.3675694 ,-1.6098841 ,-3.4147859 , 2.1168516 ,-1.9529305 ,-0.12548867, 3.4388335 ,-1.4071734 , 0.9507897 , 4.8206787 , 1.676873  ,-1.7102181 , 1.7746873 , 0.02711739
diff --git a/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..d23450d
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/PRelu_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+-4.707647  ,-4.0921726 , 3.5813692 ,-4.71081   , 3.157816  ,-3.0034213 ,-0.21858999,-1.1736552 ,-1.6042249 ,-3.93102   ,-4.0407577 , 3.7350774 ,-4.9545655 ,-1.5413756 , 0.34996858, 2.0339615 , 0.99290746,-3.9916334 ,-4.149016  ,-3.2332835 , 3.6728513 , 2.4537466 ,-3.103485  ,-0.4829316 , 4.8046784 ,-1.753812  , 4.878712  ,-1.4039769 , 1.6640003 ,-1.2041731 , 0.8046477 , 0.9196048 ,-0.6475092 , 1.1409346 , 2.0324717 ,-0.04227797,-0.5379897 , 3.205104  , 3.3556423 , 4.8447986 ,-1.9695646 ,-2.6304977 ,-3.7261262 ,-4.725599  , 2.1162436 ,-0.5631174 ,-0.5820323 , 0.8398242 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..eb058a1
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-0.55411166,-4.1992335 , 1.4317423 ,-3.7261302 , 1.151971  ,-2.117022  ,-0.7386241 , 4.654951  , 1.4869142 ,-4.6252975 ,-3.305923  , 3.632628  ,-2.6403873 ,-4.862389  , 3.477561  ,-4.9842925 ,-3.6267536 , 4.9950438 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..ff15f03
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.18094282,-0.58095986, 1.2765085 ,-0.534363  , 4.5564513 ,-0.28305855, 0.80606604,-3.3217795 ,-0.08041744,-3.7558215 ,-0.5370528 , 1.8984528 ,-0.09462419,-0.28595117, 4.6817894 ,-4.6653147 ,-4.127137  ,-2.3407753 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..e564168
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-0.62747055, 1.4133646 ,-0.9954612 ,-4.687624  ,-2.5390003 ,-4.534569  ,-1.1943612 ,-4.830596  , 4.3214984 ,-2.4795794 , 4.166298  ,-1.4772589 ,-4.074577  , 3.2332711 ,-1.5221404 ,-1.7308865 , 0.06814837, 2.944668  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..c763b63
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.2136867 , 0.6229863 , 0.02772082,-0.00820862,-2.4893622 ,-0.6757174 ,-2.2024722 ,-2.0893583 , 0.33953062,-3.5438979 , 0.7000838 , 1.3219849 ,-0.02302017, 2.3125873 ,-1.5376673 ,-4.0330076 , 4.755884  , 2.729685  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..12e1327
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+ 0.82922786, 4.762074  ,-3.5043278 , 2.4521468 , 2.6450796 ,-2.8606322 , 0.8321993 ,-1.4020495 ,-0.25749585, 1.0287803 ,-3.911455  ,-1.8311876 , 2.763438  , 3.8604703 ,-3.5478592 ,-4.2335987 ,-3.6402035 ,-1.8485361 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..42ce6be
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 1.1826919 , 0.07451724, 3.48515   , 3.4905832 , 1.8009655 , 4.155749  , 3.3155255 , 2.6834202 ,-1.7111781 ,-2.2254407 ,-4.578932  ,-2.1239302 ,-0.1269101 ,-2.6022012 ,-4.8320093 , 0.2983099 ,-0.43314072,-0.66332716
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..f677cc8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+-1.2971772 ,-3.6082    ,-2.2253058 ,-4.4367466 ,-1.7221912 , 0.02547262,-3.641017  , 0.2953748 , 0.7217547 , 4.663728  , 4.262444  ,-3.196005  ,-1.6792587 ,-1.7463406 , 2.030074  , 0.67998594,-0.92862725,-1.7960806 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..841ea9f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+ 2.2390285 ,-1.9557759 ,-1.2331479 ,-2.4810686 ,-0.5112022 , 1.741153  , 0.13645513,-2.3543327 ,-3.2610211 , 2.5739572 ,-0.50510126, 2.3544457 , 1.884411  ,-3.7153857 ,-1.7037194 ,-0.36849263,-4.819704  , 3.047652  
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..08ec9fe
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+-0.9080747 ,-1.5609599 ,-0.40923035,-2.0569193 , 4.5904484 ,-0.02348744, 0.35939455, 2.2017193 , 2.2766497 ,-2.2080436 ,-2.6453862 ,-3.6456985 , 4.160244  , 1.7283534 , 4.5547447 ,-1.8674839 , 3.019465  , 1.1584582 
diff --git a/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..a4f2d97
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/ReLU_000_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 4.5920744 , 3.827386  ,-2.1228654 , 3.7227573 ,-3.4464717 , 0.31313375, 0.5531476 ,-0.30391756,-0.21601346, 3.8968146 , 0.23224053,-0.6208954 ,-0.76323295,-1.1700501 ,-1.6203161 , 2.1780837 , 2.3581395 , 2.6519518 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..0e8d687
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-2.327701  , 1.9312059 ,-2.0069487 ,-1.2584914 ,-0.08435626, 0.47685367,-2.7456024 , 2.1275337 ,-4.9685698 , 1.8143541 , 0.52829266,-2.770121  
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..67732e8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 0.01133719,-3.3741624 , 3.556686  ,-4.21059   , 0.49977505, 1.768375  , 3.867543  , 2.270572  ,-3.9507272 ,-4.595618  ,-4.7460327 , 0.5856542 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..7bc7124
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-2.7181    , 4.6819983 , 2.9022477 ,-0.10716935, 3.6687856 ,-2.5403244 ,-4.477037  , 2.5499978 ,-3.9294813 , 0.08725335,-2.243345  ,-1.4018577 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..0fac9fb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/3.txt
@@ -0,0 +1 @@
+-3.920553  , 0.87464577,-1.0319884 , 2.1885726 , 2.755115  ,-1.6436632 ,-4.4507327 , 4.915525  , 2.9331517 , 4.7712016 , 4.676084  ,-1.7715888 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..df79104
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.181168  ,-1.6011912 ,-4.359466  ,-1.3662407 ,-0.06876431,-2.9213328 ,-0.5463467 ,-3.7916536 ,-3.751455  ,-2.822578  , 0.8914152 ,-3.0267959 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt

new file mode 100644 (file)

index 0000000..4b999a0
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/0.txt
@@ -0,0 +1 @@
+ 3.241328  , 2.7033713 ,-2.5329788 ,-4.078369  ,-3.6711028 , 2.8912613 , 0.6188993 , 3.3729403 , 2.9906578 , 0.69040877, 0.6443222 , 1.1676162 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt

new file mode 100644 (file)

index 0000000..7061063
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/1.txt
@@ -0,0 +1 @@
+ 1.572614  , 3.6147017 , 1.4378501 ,-0.81497866, 1.5987366 , 3.7698908 ,-3.8637109 , 4.5728784 ,-0.8706349 , 0.7389268 , 4.64117   ,-0.96047217
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt

new file mode 100644 (file)

index 0000000..c048a8a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/2.txt
@@ -0,0 +1 @@
+ 0.00864919,-3.1653113 ,-2.125551  , 2.9225516 ,-1.1439148 , 4.6509814 ,-2.097259  , 2.5843353 ,-2.067207  ,-2.5034845 ,-4.9441104 ,-3.9062042 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt

new file mode 100644 (file)

index 0000000..55be3b4
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/3.txt
@@ -0,0 +1 @@
+ 1.0920542 , 0.5510192 , 1.3465579 ,-2.3510268 , 4.016736  , 4.7848744 ,-0.42403316, 0.00571597, 1.6412207 , 1.7787368 , 2.4728034 ,-3.5900247 
diff --git a/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt

new file mode 100644 (file)

index 0000000..04c7a1a
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/Split_000_config/channel/uint8/4.txt
@@ -0,0 +1 @@
+-2.9799085,-3.9477375, 0.6402844, 3.304766 , 3.8880465,-3.5069442,-2.3702915, 4.126247 ,-3.1614416, 2.9909244,-2.8755414, 0.2627986
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt

new file mode 100644 (file)

index 0000000..e9db48f
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/0.txt
@@ -0,0 +1 @@
+-1.4124781 , 0.42694193, 1.1734594 ,-3.5111153 ,-2.9756174 , 1.3682148 ,-2.318465  , 2.198896  ,-4.5043235 , 3.1775594 ,-0.42802384,-1.4872279 , 1.3821319 ,-4.771963  ,-0.12837897, 4.132799  , 3.697655  , 2.0807178 ,-3.621293  , 2.121878  ,-0.25654107, 0.42100102,-1.4009671 ,-2.9733627 ,-0.7058871 ,-2.831215  , 3.5669627 , 2.1420689 ,-1.8789555 , 0.8104939 ,-2.0503597 , 1.7788508 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt

new file mode 100644 (file)

index 0000000..479d062
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/1.txt
@@ -0,0 +1 @@
+ 3.4726453 , 3.0497985 ,-4.234619  ,-1.0526706 , 1.7278554 ,-3.341614  , 4.54768   , 3.0954597 ,-3.735109  , 2.8810751 ,-2.5381427 ,-3.2360535 ,-1.5378917 , 2.3052745 ,-3.170938  ,-3.327242  , 2.0654576 ,-2.2294598 ,-1.881382  , 0.13216451,-4.2825613 , 0.26616526, 4.6196365 ,-0.88623226, 1.7103885 ,-1.5865034 ,-3.9114466 ,-3.2227128 , 4.909618  , 2.3318915 , 0.84300846, 0.760918  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt

new file mode 100644 (file)

index 0000000..ae28234
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/2.txt
@@ -0,0 +1 @@
+-4.6097918,-4.21991  ,-3.9955974, 3.6492047, 2.9191775, 2.8082933, 1.6189331, 0.2730309,-1.5029653,-1.9471445, 4.8758197, 3.3177438, 3.1338058,-2.1281245,-1.7526287,-2.5518703,-1.7746793, 4.0455256,-0.5839861,-4.408046 ,-4.0034447, 1.5858272,-4.5896654, 4.7211285,-4.677515 ,-2.6027086,-4.7896166,-3.5512326,-1.9068764,-2.9705904,-4.854087 ,-4.892111 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt

new file mode 100644 (file)

index 0000000..fd40f84
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/3.txt
@@ -0,0 +1 @@
+ 2.1514777e-02, 2.6526773e+00,-3.0477784e+00, 1.3287724e+00,-4.1414630e-01,-1.7295350e-01, 7.6649576e-01,-1.8028022e+00,-7.0781744e-01,-2.5262204e-01,-3.0970418e+00,-1.3165286e+00,-4.6649928e+00, 2.0809033e+00,-1.5739973e+00,-4.0531826e-01,-2.1718202e+00, 2.0146034e+00, 2.5044403e+00,-1.1256610e+00, 1.3536702e+00, 1.0283234e-03,-1.8823910e+00, 4.7122188e+00, 9.4781297e-01, 3.2012525e+00,-5.5164534e-01,-2.6158772e+00,-1.8771547e+00,-3.1689723e+00, 4.9054880e+00,-3.4560370e+00
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt

new file mode 100644 (file)

index 0000000..e81c3b8
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/channel/int16/4.txt
@@ -0,0 +1 @@
+-2.0927553 ,-2.107511  ,-1.6963564 , 1.7006218 , 1.4575784 , 0.06095728, 1.2659966 , 4.1905265 , 1.3035946 , 4.9793477 ,-4.3388166 ,-0.23496658, 1.9831208 , 2.6154642 ,-0.2790228 ,-3.1774354 ,-3.178935  ,-1.1564373 ,-0.8199472 ,-2.245698  ,-4.8605046 ,-3.569018  ,-1.4226891 ,-4.1067843 , 2.6078918 ,-3.5830674 , 1.9065963 , 2.435578  ,-3.3216476 , 4.5930347 , 2.9191844 , 1.7885648 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt

new file mode 100644 (file)

index 0000000..a8874bc
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/0.txt
@@ -0,0 +1 @@
+ 3.9384239 ,-3.7377489 , 0.97284186, 3.8309984 , 2.4125865 , 1.7141674 , 3.9459977 ,-0.304659  ,-3.4623327 , 4.4569106 , 4.209985  ,-0.6677348 , 3.4578135 , 1.6779743 , 2.502791  ,-1.324285  , 1.3139176 , 3.4334664 ,-2.2695086 ,-4.001059  ,-0.91164917, 4.4447775 ,-3.0275404 ,-2.0852396 , 3.6677403 ,-2.9595146 , 2.0921555 , 1.7570637 , 3.717391  ,-0.3216191 ,-0.8410847 , 2.662336  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt

new file mode 100644 (file)

index 0000000..715e680
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/1.txt
@@ -0,0 +1 @@
+ 0.6663157 ,-0.04146723,-0.8193995 , 4.804576  ,-2.1357434 , 4.0829    ,-1.6380692 , 1.8043218 , 2.3431025 , 0.30111   , 1.2928191 ,-1.8559257 ,-0.68305963,-1.1502715 , 1.9492546 ,-2.7240746 , 2.9279857 ,-3.3329778 ,-4.8343406 ,-0.02708206, 1.1840513 , 3.6476028 , 4.75276   ,-4.9085226 ,-1.1922491 , 0.54225117, 3.17247   ,-2.7856457 ,-3.0866194 ,-2.2077718 , 1.6263398 , 3.7066603 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt

new file mode 100644 (file)

index 0000000..3ca893e
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/2.txt
@@ -0,0 +1 @@
+-4.8507566 ,-1.267258  , 0.5099198 , 1.650726  , 3.4329638 ,-2.2652836 , 1.2157568 , 0.18305123, 3.6754217 ,-4.6185255 ,-1.0646905 ,-0.46092424, 2.046326  ,-2.8830478 , 4.156068  ,-2.0503244 , 0.0755459 ,-4.6472006 ,-0.50128895, 3.1129324 ,-4.4048553 , 0.47983927, 1.4510479 , 3.9226127 ,-4.767221  ,-2.795826  ,-4.816457  ,-3.6127663 ,-2.2712553 , 4.586938  , 1.1028811 , 1.5028698 
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt

new file mode 100644 (file)

index 0000000..3fba8ec
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/3.txt
@@ -0,0 +1 @@
+ 4.9431224 ,-3.4878132 ,-2.4831018 , 2.2395666 ,-2.3317611 ,-1.6786547 ,-2.4702384 , 3.2167027 , 1.7300137 , 2.8848834 ,-4.6395254 , 0.5527259 ,-2.915835  ,-1.0066313 ,-0.278253  , 4.6136203 ,-3.4183645 ,-1.5189631 ,-4.599058  , 3.3198457 ,-3.9464161 ,-0.6357558 , 0.32550323, 3.2147424 , 4.921844  ,-0.30067012, 3.9456701 , 0.5943688 ,-4.7229166 ,-3.6803844 ,-3.3813965 , 3.283583  
diff --git a/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt

new file mode 100644 (file)

index 0000000..16cc23b
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_inputs/TransposeConv_001_config/layer/uint8/4.txt
@@ -0,0 +1 @@
+ 2.232644  , 4.465217  , 1.926956  ,-4.007337  ,-2.7392106 ,-2.4579394 , 2.913538  ,-1.7261469 , 3.8706868 , 0.06259949,-2.018361  , 1.2728635 ,-3.133289  ,-4.943454  ,-1.5415367 ,-4.8183494 , 4.348317  ,-2.4929109 ,-0.9018388 ,-4.776565  , 4.634248  , 3.0753953 , 2.3412373 ,-2.7086196 , 3.4485948 , 0.3561932 , 0.03650501,-2.8704169 , 1.0514414 , 3.3964615 , 1.2783849 , 4.974951  
diff --git a/compiler/pota-quantization-value-test/test_quantization_with_config.sh b/compiler/pota-quantization-value-test/test_quantization_with_config.sh

new file mode 100755 (executable)

index 0000000..1364dfb
--- /dev/null
+++ b/compiler/pota-quantization-value-test/test_quantization_with_config.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+# This script tests quantize_with_minmax option of circle-quantizer with config file
+#
+# HOW TO USE
+#
+# ./test_quantization_with_config.sh <path/to/test.config> <path/to/work_dir> <TEST 1> <TEST 2> ...
+# test.config : set ${RECORD_MINMAX_PATH} and ${CIRCLE_QUANTIZER_PATH}
+# work_dir : build directory of quantization-value-test (ex: build/compiler/quantization-value-test)
+
+SOURCE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+COMPARE_SCRIPT_PATH="${SOURCE_PATH}/compare_tensors.py"
+CONFIG_PATH="$1"; shift
+BIN_PATH=$(dirname "${CONFIG_PATH}")
+TEST_INPUT_PATH="${SOURCE_PATH}/test_inputs"
+GEN_SCRIPT_PATH="${BIN_PATH}/gen_h5_explicit_inputs.py"
+WORKDIR="$1"; shift
+
+source "${CONFIG_PATH}"
+
+echo "-- Found CIRCLE_QUANTIZER: ${CIRCLE_QUANTIZER_PATH}"
+echo "-- Found CIRCLE_TENSORDUMP: ${CIRCLE_TENSORDUMP_PATH}"
+echo "-- Found workdir: ${WORKDIR}"
+
+TESTED=()
+PASSED=()
+FAILED=()
+
+pushd "${WORKDIR}"
+while [ "$1" != "" ]; do  
+  MODELNAME=$1; shift
+  GRANULARITY=$1; shift
+  DTYPE=$1; shift
+  TESTCASE="${MODELNAME}.${GRANULARITY}.${DTYPE}"
+
+  TESTED+=("${TESTCASE}")
+
+  TESTCASE_FILE="${WORKDIR}/${TESTCASE}"
+  TEST_RESULT_FILE="${BIN_PATH}/${TESTCASE}"
+
+  PASSED_TAG="${TEST_RESULT_FILE}.quantization.mixed.passed"
+  rm -f "${PASSED_TAG}"
+
+  cat > "${TEST_RESULT_FILE}_quantization_with_config.log" <(
+    exec 2>&1
+    set -ex
+
+    # Generate h5 input data
+    source "${VIRTUALENV}/bin/activate"
+    "${VIRTUALENV}/bin/python" "${GEN_SCRIPT_PATH}" \
+      --model "${WORKDIR}/${MODELNAME}.circle" \
+      --input "${TEST_INPUT_PATH}/${MODELNAME}_config/${GRANULARITY}/${DTYPE}" \
+      --output "${TESTCASE_FILE}.mixed.input.h5"
+
+    if [[ $? -ne 0 ]]; then
+      echo "FAILED TO GENERATE INPUT"
+      continue
+    fi
+
+    # Run record-minmax
+    # NOTE There is no '_with_config' test for record-minmax, because it does not
+    # use quantization config file.
+    "${RECORD_MINMAX_PATH}" \
+      --input_model "${TEST_RESULT_FILE}.fake_quantized.mixed.circle" \
+      --input_data "${TESTCASE_FILE}.mixed.input.h5" \
+      --output_model "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" 
+
+    # Run circle-quantizer with --quantize_with_minmax
+    "${CIRCLE_QUANTIZER_PATH}" \
+      --quantize_with_minmax float32 "${DTYPE}" "${GRANULARITY}" \
+      --config "${SOURCE_PATH}/config_files/${MODELNAME}/${GRANULARITY}/${DTYPE}/qconf.json" \
+      "${TEST_RESULT_FILE}.minmax_recorded.mixed.circle" \
+      "${TEST_RESULT_FILE}.quantized.mixed.circle" 
+
+    # Dump scale, zp, weights values (circle-tensordump)
+    "${CIRCLE_TENSORDUMP_PATH}" \
+      "${TEST_RESULT_FILE}.quantized.mixed.circle" \
+      --tensors_to_hdf5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5"
+
+    # Compare result
+    "${VIRTUALENV}/bin/python" "${COMPARE_SCRIPT_PATH}" \
+      --input_h5 "${TEST_RESULT_FILE}.quantized.mixed.circle.h5" \
+      --expect_dir "${SOURCE_PATH}/expected_outputs/${MODELNAME}_config/${GRANULARITY}/${DTYPE}/quantization" \
+      --mode quantization
+
+    if [[ $? -eq 0 ]]; then
+      touch "${PASSED_TAG}"
+    fi
+  )
+
+  if [[ -f "${PASSED_TAG}" ]]; then
+    PASSED+=("$TESTCASE")
+  else
+    FAILED+=("$TESTCASE")
+  fi
+done
+popd
+
+if [[ ${#TESTED[@]} -ne ${#PASSED[@]} ]]; then
+  echo "FAILED"
+  for TEST in "${FAILED[@]}"
+  do
+    echo "- ${TEST}"
+  done
+  exit 255
+fi
+
+echo "PASSED"
+exit 0
diff --git a/compiler/pp/CMakeLists.txt b/compiler/pp/CMakeLists.txt

index 6d58458ca194c8fff894ef6fb0a0c93bc4ad403a..1db09cb883441502ed931d63cb7ad6bfa03c4ec5 100644 (file)
--- a/compiler/pp/CMakeLists.txt
+++ b/compiler/pp/CMakeLists.txt
@@ -3,7 +3,9 @@ file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
  add_library(pp STATIC ${SOURCES})
-set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(pp PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
  target_include_directories(pp PUBLIC include)
  target_link_libraries(pp PRIVATE nncc_common)
  target_link_libraries(pp PUBLIC nncc_coverage)
diff --git a/compiler/record-minmax-conversion-test/CMakeLists.txt b/compiler/record-minmax-conversion-test/CMakeLists.txt

index 2221e1702c50464c1b9bdbf33e24b67070077d40..31b9061423249284eb0ae4cec58bf20735f6d4c1 100644 (file)
--- a/compiler/record-minmax-conversion-test/CMakeLists.txt
+++ b/compiler/record-minmax-conversion-test/CMakeLists.txt
@@ -37,6 +37,6 @@ add_test(
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/testall.sh"
            "${TEST_CONFIG}"
            "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_1_13_2"
+          "${NNCC_OVERLAY_DIR}/venv_2_8_0"
            ${RECORD_MINMAX_CONVERSION_TEST}
  )
diff --git a/compiler/record-minmax/CMakeLists.txt b/compiler/record-minmax/CMakeLists.txt

index da63bbf5f82400de54027b0202469bd75a3acc0e..b9c08f47298ce007da5261b7b4a3309d4c2ca110 100644 (file)
--- a/compiler/record-minmax/CMakeLists.txt
+++ b/compiler/record-minmax/CMakeLists.txt
@@ -1,25 +1,17 @@
-nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
-
-if(NOT HDF5_FOUND)
-  message(STATUS "Build record-minmax: FAILED (missing HDF5)")
-  return()
-endif(NOT HDF5_FOUND)
-
  set(DRIVER "driver/Driver.cpp")
  
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
  add_executable(record-minmax ${DRIVER} ${SOURCES})
  target_include_directories(record-minmax PRIVATE include)
-target_include_directories(record-minmax PRIVATE ${HDF5_INCLUDE_DIRS})
  
-target_link_libraries(record-minmax ${HDF5_CXX_LIBRARIES})
  target_link_libraries(record-minmax arser)
  target_link_libraries(record-minmax safemain)
  target_link_libraries(record-minmax luci_import)
  target_link_libraries(record-minmax luci_env)
  target_link_libraries(record-minmax luci_export)
  target_link_libraries(record-minmax luci_interpreter)
+target_link_libraries(record-minmax dio_hdf5)
  target_link_libraries(record-minmax vconone)
  target_link_libraries(record-minmax nncc_coverage)
  
diff --git a/compiler/record-minmax/requires.cmake b/compiler/record-minmax/requires.cmake

index 9cf12591e1e8e6b77aee78ede39099d3f947c4b0..69373e76f699d27db05a8c5bafd6b3425bca95a0 100644 (file)
--- a/compiler/record-minmax/requires.cmake
+++ b/compiler/record-minmax/requires.cmake
@@ -2,4 +2,5 @@ require("luci")
  require("luci-interpreter")
  require("safemain")
  require("arser")
+require("dio-hdf5")
  require("vconone")
diff --git a/compiler/record-minmax/src/HDF5Importer.cpp b/compiler/record-minmax/src/HDF5Importer.cpp

deleted file mode 100644 (file)

index cfb270c..0000000
--- a/compiler/record-minmax/src/HDF5Importer.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "HDF5Importer.h"
-
-#include <H5Cpp.h>
-
-#include <string>
-#include <cassert>
-#include <stdexcept>
-
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
-
-namespace
-{
-
-Shape toInternalShape(const H5::DataSpace &dataspace)
-{
-  int rank = dataspace.getSimpleExtentNdims();
-
-  std::vector<hsize_t> dims;
-  dims.resize(rank, 0);
-  dataspace.getSimpleExtentDims(dims.data());
-
-  Shape res(rank);
-  for (int axis = 0; axis < rank; ++axis)
-  {
-    res.dim(axis) = dims[axis];
-  }
-
-  return res;
-}
-
-DataType toInternalDtype(const H5::DataType &h5_type)
-{
-  if (h5_type == H5::PredType::IEEE_F32BE || h5_type == H5::PredType::IEEE_F32LE)
-  {
-    return DataType::FLOAT32;
-  }
-  if (h5_type == H5::PredType::STD_I32BE || h5_type == H5::PredType::STD_I32LE)
-  {
-    return DataType::S32;
-  }
-  if (h5_type == H5::PredType::STD_I64BE || h5_type == H5::PredType::STD_I64LE)
-  {
-    return DataType::S64;
-  }
-  if (h5_type.getClass() == H5T_class_t::H5T_ENUM)
-  {
-    // We follow the numpy format
-    // In numpy 1.19.0, np.bool_ is saved as H5T_ENUM
-    // - (name, value) -> (FALSE, 0) and (TRUE, 1)
-    // - value dtype is H5T_STD_I8LE
-    // TODO Find a general way to recognize BOOL type
-    char name[10];
-    int8_t value[2] = {0, 1};
-    if (H5Tenum_nameof(h5_type.getId(), value, name, 10) < 0)
-      return DataType::Unknown;
-
-    if (std::string(name) != "FALSE")
-      return DataType::Unknown;
-
-    if (H5Tenum_nameof(h5_type.getId(), value + 1, name, 10) < 0)
-      return DataType::Unknown;
-
-    if (std::string(name) != "TRUE")
-      return DataType::Unknown;
-
-    return DataType::BOOL;
-  }
-  // TODO Support more datatypes
-  return DataType::Unknown;
-}
-
-void readTensorData(H5::DataSet &tensor, uint8_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_UINT8);
-}
-
-void readTensorData(H5::DataSet &tensor, float *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_FLOAT);
-}
-
-void readTensorData(H5::DataSet &tensor, int32_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_INT);
-}
-
-void readTensorData(H5::DataSet &tensor, int64_t *buffer)
-{
-  tensor.read(buffer, H5::PredType::NATIVE_LONG);
-}
-
-} // namespace
-
-namespace record_minmax
-{
-
-int32_t HDF5Importer::numInputs(int32_t record_idx)
-{
-  auto records = _value_grp.openGroup(std::to_string(record_idx));
-  return records.getNumObjs();
-}
-
-void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, void *buffer)
-{
-  auto record = _value_grp.openGroup(std::to_string(record_idx));
-  auto tensor = record.openDataSet(std::to_string(input_idx));
-
-  readTensorData(tensor, static_cast<uint8_t *>(buffer));
-}
-
-void HDF5Importer::readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
-                              void *buffer)
-{
-  auto record = _value_grp.openGroup(std::to_string(record_idx));
-  auto tensor = record.openDataSet(std::to_string(input_idx));
-
-  auto tensor_dtype = tensor.getDataType();
-  *dtype = toInternalDtype(tensor_dtype);
-
-  auto tensor_shape = tensor.getSpace();
-  *shape = toInternalShape(tensor_shape);
-
-  switch (*dtype)
-  {
-    case DataType::FLOAT32:
-      readTensorData(tensor, static_cast<float *>(buffer));
-      break;
-    case DataType::S32:
-      readTensorData(tensor, static_cast<int32_t *>(buffer));
-      break;
-    case DataType::S64:
-      readTensorData(tensor, static_cast<int64_t *>(buffer));
-      break;
-    case DataType::BOOL:
-      readTensorData(tensor, static_cast<uint8_t *>(buffer));
-      break;
-    default:
-      throw std::runtime_error{"Unsupported data type for input data (.h5)"};
-  }
-}
-
-} // namespace record_minmax
diff --git a/compiler/record-minmax/src/HDF5Importer.h b/compiler/record-minmax/src/HDF5Importer.h

deleted file mode 100644 (file)

index 9e98c77..0000000
--- a/compiler/record-minmax/src/HDF5Importer.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __RECORD_MINMAX_HDF5IMPORTER_H__
-#define __RECORD_MINMAX_HDF5IMPORTER_H__
-
-#include <luci_interpreter/core/Tensor.h>
-
-#include <H5Cpp.h>
-
-#include <stdexcept>
-
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
-
-namespace record_minmax
-{
-
-// HDF5Importer reads an input data saved in the hdf5 file in the given path
-// The hierarchy of the hdf5 file is as follows.
-// Group "/"
-//  > Group "value"
-//    > Group <record_idx>
-//      > Dataset <input_idx>
-// record_idx : index of the record (dataset file can contain multiple records)
-// input_idx : index of the input (DNN model can have multiple inputs)
-// Ex: the j'th input of the i'th record can be accessed by "/value/i/j"
-class HDF5Importer
-{
-public:
-  explicit HDF5Importer(const std::string &path)
-  {
-    if (_file.isHdf5(path) == false)
-      throw std::runtime_error("Given data file is not HDF5");
-
-    _file = H5::H5File(path, H5F_ACC_RDONLY);
-  }
-
-public:
-  /**
-   * @brief importGroup has to be called before readTensor is called
-   *        Otherwise, readTensor will throw an exception
-   */
-  void importGroup() { _value_grp = _file.openGroup("value"); }
-
-  /**
-   * @brief Read tensor data from file and store it into buffer
-   * @details A tensor in the file can be retrieved with (record_idx, input_idx)
-   * @param record_idx : index of the record
-   * @param input_idx : index of the input
-   * @param dtype : pointer to write the tensor's data type
-   * @param shape : pointer to write the tensor's shape
-   * @param buffer : pointer to write the tensor's data
-   */
-  void readTensor(int32_t record_idx, int32_t input_idx, DataType *dtype, Shape *shape,
-                  void *buffer);
-
-  // Read a raw tensor (no type/shape is specified)
-  void readTensor(int32_t record_idx, int32_t input_idx, void *buffer);
-
-  bool isRawData() { return _value_grp.attrExists("rawData"); }
-
-  int32_t numRecords() { return _value_grp.getNumObjs(); }
-
-  int32_t numInputs(int32_t record_idx);
-
-private:
-  H5::H5File _file;
-  H5::Group _value_grp;
-};
-
-} // namespace record_minmax
-
-#endif // __RECORD_MINMAX_HDF5IMPORTER_H__
diff --git a/compiler/record-minmax/src/MinMaxObserver.cpp b/compiler/record-minmax/src/MinMaxObserver.cpp

index 28ae2b33ba35aca71f7dcecbf421890c5cd49fed..8288d3e5e6affe8e32bc7e03e64ce0a07c2e4ac7 100644 (file)
--- a/compiler/record-minmax/src/MinMaxObserver.cpp
+++ b/compiler/record-minmax/src/MinMaxObserver.cpp
@@ -51,6 +51,16 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
      // Bool type tensor is not quantized
      return;
    }
+  if (node->dtype() == DataType::S32)
+  {
+    // Integer type tensor is not quantized
+    return;
+  }
+  if (node->dtype() == DataType::S64)
+  {
+    // Integer type tensor is not quantized
+    return;
+  }
  
    // Only support recording of float32 values
    if (tensor->element_type() != DataType::FLOAT32)
@@ -58,9 +68,6 @@ void MinMaxObserver::postTensorWrite(const luci::CircleNode *node,
      // Exceptions that should be processed in backends
      switch (node->opcode())
      {
-      case luci::CircleOpcode::ARG_MAX:
-        // Output of arg_max is the index of the largest value across axes of a tensor.
-        // It always has integer type.
        case luci::CircleOpcode::CAST:
          // Cast is quantized only if it converts <type> -> float.
          // Other cases should be processed in backends.
diff --git a/compiler/record-minmax/src/RecordMinMax.cpp b/compiler/record-minmax/src/RecordMinMax.cpp

index c249960f86514c038e789f6d5a8667062af88887..10a14516f73fc576049aaaaf2288f1c86b08429c 100644 (file)
--- a/compiler/record-minmax/src/RecordMinMax.cpp
+++ b/compiler/record-minmax/src/RecordMinMax.cpp
@@ -17,12 +17,12 @@
  #include "RecordMinMax.h"
  #include "RecordFunction.h"
  #include "MinMaxObserver.h"
-#include "HDF5Importer.h"
  
  #include <luci/Importer.h>
  #include <luci/CircleExporter.h>
  #include <luci/CircleFileExpContract.h>
  #include <luci/IR/CircleQuantParam.h>
+#include <dio_hdf5/HDF5Importer.h>
  
  #include <dirent.h>
  #include <algorithm>
@@ -33,12 +33,34 @@
  #include <iostream>
  #include <random>
  
-using Shape = luci_interpreter::Shape;
-using DataType = luci_interpreter::DataType;
+using Shape = std::vector<loco::Dimension>;
+using DataType = loco::DataType;
  
  namespace
  {
  
+uint32_t numElements(const luci::CircleNode *node)
+{
+  uint32_t num_elements = 1;
+  for (uint32_t i = 0; i < node->rank(); i++)
+    num_elements *= node->dim(i).value();
+
+  return num_elements;
+}
+
+// Throw exception if input has one of the following conditions.
+// 1. Have unknown dimension
+// 2. Number of elements is 0
+void checkInputDimension(const luci::CircleInput *input)
+{
+  for (uint32_t i = 0; i < input->rank(); i++)
+    if (!input->dim(i).known())
+      throw std::runtime_error(input->name() + " has unknown dimension");
+
+  if (numElements(input) == 0)
+    throw std::runtime_error(input->name() + " is a zero-sized input");
+}
+
  void readDataFromFile(const std::string &filename, std::vector<char> &data, size_t data_size)
  {
    assert(data.size() == data_size); // FIX_CALLER_UNLESS
@@ -62,6 +84,21 @@ std::vector<uint8_t> genRandomBoolData(std::mt19937 &gen, uint32_t num_elements)
    return input_data;
  }
  
+template <typename T>
+std::vector<T> genRandomIntData(std::mt19937 &gen, uint32_t num_elements, T min, T max)
+{
+  std::uniform_int_distribution<T> dist(min, max);
+  std::vector<T> input_data(num_elements);
+
+  // Write random data
+  {
+    auto const generator = [&gen, &dist]() { return dist(gen); };
+    std::generate(begin(input_data), end(input_data), generator);
+  }
+
+  return input_data;
+}
+
  /**
   * @brief  getTensorSize will return size in bytes
   */
@@ -83,12 +120,12 @@ void verifyTypeShape(const luci::CircleInput *input_node, const DataType &dtype,
    if (dtype != input_node->dtype())
      throw std::runtime_error("Wrong input type.");
  
-  if (shape.num_dims() != input_node->rank())
+  if (shape.size() != input_node->rank())
      throw std::runtime_error("Input rank mismatch.");
  
-  for (uint32_t i = 0; i < shape.num_dims(); i++)
+  for (uint32_t i = 0; i < shape.size(); i++)
    {
-    if (shape.dim(i) != input_node->dim(i).value())
+    if (not(shape.at(i) == input_node->dim(i)))
        throw std::runtime_error("Input shape mismatch.");
    }
  }
@@ -188,6 +225,7 @@ void RecordMinMax::profileRawDataDirectory(const std::string &mode,
    for (auto input : input_nodes)
    {
      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+    checkInputDimension(input_node);
      total_input_size += getTensorSize(input_node);
    }
  
@@ -254,6 +292,7 @@ void RecordMinMax::profileRawData(const std::string &mode, const std::string &in
    for (auto input : input_nodes)
    {
      const auto *input_node = loco::must_cast<const luci::CircleInput *>(input);
+    checkInputDimension(input_node);
      total_input_size += getTensorSize(input_node);
    }
  
@@ -296,12 +335,12 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
  {
    try
    {
-    HDF5Importer importer(input_data_path);
-    importer.importGroup();
+    dio::hdf5::HDF5Importer importer(input_data_path);
+    importer.importGroup("value");
  
      bool is_raw_data = importer.isRawData();
  
-    const auto num_records = importer.numRecords();
+    const auto num_records = importer.numData();
      if (num_records == 0)
        throw std::runtime_error("The input data file does not contain any record.");
  
@@ -319,12 +358,13 @@ void RecordMinMax::profileData(const std::string &mode, const std::string &input
        {
          const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
          assert(input_node->index() == input_idx);
+        checkInputDimension(input_node);
          std::vector<char> input_data(getTensorSize(input_node));
  
          if (!is_raw_data)
          {
            DataType dtype;
-          Shape shape(input_node->rank());
+          Shape shape;
            importer.readTensor(record_idx, input_idx, &dtype, &shape, input_data.data());
  
            // Check the type and the shape of the input data is valid
@@ -376,43 +416,47 @@ void RecordMinMax::profileDataWithRandomInputs(const std::string &mode, float mi
      {
        const auto *input_node = loco::must_cast<const luci::CircleInput *>(input_nodes[input_idx]);
        assert(input_node->index() == input_idx);
-      uint32_t num_elements = 1;
-      for (uint32_t i = 0; i < input_node->rank(); i++)
-      {
-        if (!input_node->dim(i).known())
-          throw std::runtime_error("Input dimension must be known");
+      checkInputDimension(input_node);
  
-        num_elements *= input_node->dim(i).value();
-      }
-
-      if (num_elements == 0)
-        throw std::runtime_error("Only support non-zero sized inputs");
+      const auto num_elements = numElements(input_node);
  
        // TODO Support more input data types
        assert(input_node->dtype() == loco::DataType::FLOAT32 ||
-             input_node->dtype() == loco::DataType::BOOL);
+             input_node->dtype() == loco::DataType::BOOL ||
+             input_node->dtype() == loco::DataType::S32 ||
+             input_node->dtype() == loco::DataType::S64);
  
        if (input_node->dtype() == DataType::FLOAT32)
-      // clang-format off
        {
-      std::vector<float> input_data(num_elements);
+        std::vector<float> input_data(num_elements);
  
-      // Write random data
-      for (auto &iter : input_data)
-        iter = static_cast<float>(dist(gen));
+        // Write random data
+        for (auto &iter : input_data)
+          iter = static_cast<float>(dist(gen));
  
-      // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
-      //       We can redcue the copy by directly writing data from file to interpreter inputs
-      _interpreter->writeInputTensor(input_node, input_data.data(),
-                                     input_data.size() * sizeof(float));
+        // TODO: Input data is copied twice (file -> buffer (input_data) -> interpreter inputs)
+        //       We can redcue the copy by directly writing data from file to interpreter inputs
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(float));
        }
-      // clang-format on
        else if (input_node->dtype() == DataType::BOOL)
        {
          auto input_data = genRandomBoolData(gen, num_elements);
          _interpreter->writeInputTensor(input_node, input_data.data(),
                                         input_data.size() * sizeof(uint8_t));
        }
+      else if (input_node->dtype() == DataType::S32)
+      {
+        auto input_data = genRandomIntData<int32_t>(gen, num_elements, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int32_t));
+      }
+      else if (input_node->dtype() == DataType::S64)
+      {
+        auto input_data = genRandomIntData<int64_t>(gen, num_elements, 0, 100);
+        _interpreter->writeInputTensor(input_node, input_data.data(),
+                                       input_data.size() * sizeof(int64_t));
+      }
      }
  
      _interpreter->interpret();
diff --git a/compiler/souschef/CMakeLists.txt b/compiler/souschef/CMakeLists.txt

index ca7eddc6f8f46d771f75ca7a24a8d90a58057dfe..f57102f1f11d9bcebcb7ac7cbdf258a93368c2bd 100644 (file)
--- a/compiler/souschef/CMakeLists.txt
+++ b/compiler/souschef/CMakeLists.txt
@@ -1,7 +1,7 @@
  nnas_find_package(Protobuf QUIET)
  
  if(NOT Protobuf_FOUND)
-  message(STATUS "Build souschef: FAILED (missing Protobuf")
+  message(STATUS "Build souschef: FAILED (missing Protobuf)")
    return()
  endif(NOT Protobuf_FOUND)
  
diff --git a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt

index 3e7e57747f9eefbf0db230e694cebe7c36aa6419..0b4739374f3d40a38411b44d3c5f0c6173eb3149 100644 (file)
--- a/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
+++ b/compiler/tf2tfliteV2-conversion-test/CMakeLists.txt
@@ -72,7 +72,7 @@ list(APPEND TEST_DEPS "${TEST_RUNNER}")
  
  get_target_property(ARTIFACTS_BIN_PATH testDataGenerator BINARY_DIR)
  
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_1_13_2")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_8_0")
  
  ###
  ### Generate test.config
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt

index 6ba55c357b5b3d69273df05ef98ab3a245534e13..9e1cb720f6572bd223e337ea1ea6874965133d8d 100644 (file)
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -10,5 +10,6 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES})
  target_include_directories(tfl-inspect PRIVATE src)
  target_link_libraries(tfl-inspect arser)
  target_link_libraries(tfl-inspect foder)
-target_link_libraries(tfl-inspect mio_tflite260)
+target_link_libraries(tfl-inspect mio_tflite280)
+target_link_libraries(tfl-inspect mio_tflite280_helper)
  target_link_libraries(tfl-inspect safemain)
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake

index 9a7477b819f2ece0462fb54b718ae99de10e8af3..a11f6b2003eeb2b9f06504fd396da68771092db6 100644 (file)
--- a/compiler/tfl-inspect/requires.cmake
+++ b/compiler/tfl-inspect/requires.cmake
@@ -1,4 +1,4 @@
  require("arser")
  require("foder")
-require("mio-tflite260")
+require("mio-tflite280")
  require("safemain")
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp

index 41a8396bbfab294643c9e55a34c08da0ec9e897b..6c45295161531dee5f0d84061c41952f3c1eccf6 100644 (file)
--- a/compiler/tfl-inspect/src/Reader.cpp
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -16,6 +16,8 @@
  
  #include "Reader.h"
  
+#include <mio_tflite280/Helper.h>
+
  #include <cassert>
  #include <sstream>
  #include <string>
@@ -23,72 +25,6 @@
  namespace tflinspect
  {
  
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  int8_t dp_code = opcode->deprecated_builtin_code();
-  // 127 is max of int8_t which is upper bound of v3 builtin_code
-  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
-  if (dp_code < 127 && dp_code >= 0)
-    return tflite::BuiltinOperator(dp_code);
-  return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
  Reader::Reader(const tflite::Model *model)
  {
    _subgraphs = model->subgraphs();
@@ -135,7 +71,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return tflinspect::builtin_code_neutral(opcode);
+  return mio::tflite::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -144,14 +80,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  return tflinspect::opcode_name(opcode);
+  return mio::tflite::opcode_name(opcode);
  }
  
  bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h

index 91b7bb940fc1e837cf1c5f4a56a9404a82b99287..98554cf85f76467d22119ab2aede98f0dac39201 100644 (file)
--- a/compiler/tfl-inspect/src/Reader.h
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt

index a87d30c5eb684c87897090d09305553039a68532..2fba335ea29f95247c6ca4584f89bd1ec0685846 100644 (file)
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES})
  target_include_directories(tfl-verify PRIVATE src)
  target_link_libraries(tfl-verify arser)
  target_link_libraries(tfl-verify foder)
-target_link_libraries(tfl-verify mio_tflite260)
+target_link_libraries(tfl-verify mio_tflite280)
  target_link_libraries(tfl-verify safemain)
  target_link_libraries(tfl-verify cwrap)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake

index 72803d890eed6f6f152ef206b9a172de8c24ec9e..b107bdfe7155b5030a3836eafa94d3b0ae264341 100644 (file)
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,5 +1,5 @@
  require("arser")
  require("foder")
-require("mio-tflite260")
+require("mio-tflite280")
  require("safemain")
  require("cwrap")
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt

index ac7fe4b7c3fb381a47764308f596d4c161949fb2..948b1cecdd0a9b8ba6b59b7b98e559365f936ba2 100644 (file)
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
    return()
  endif(NOT Protobuf_FOUND)
  
-if(NOT TARGET mio_tflite260)
-  message(STATUS "Build tflchef: FAILED (missing mio_tflite260)")
+if(NOT TARGET mio_tflite280)
+  message(STATUS "Build tflchef: FAILED (missing mio_tflite280)")
    return()
-endif(NOT TARGET mio_tflite260)
+endif(NOT TARGET mio_tflite280)
  
  # Recipe Parser
  add_subdirectory(proto)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt

index 413b78b15a1b745b6a2dc057e09b8366d4879ced..6b6fed57bbf997ec72964f533a807bcff324a5da 100644 (file)
--- a/compiler/tflchef/core/CMakeLists.txt
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include)
  target_include_directories(tflchef_core PRIVATE src)
  target_link_libraries(tflchef_core tflchef_proto)
  target_link_libraries(tflchef_core tflchef_log)
-target_link_libraries(tflchef_core mio_tflite260)
+target_link_libraries(tflchef_core mio_tflite280)
  target_link_libraries(tflchef_core souschef)
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp

index ada5ff5d1758da395eb889edd5910c5d428007b2..93b9334a674f4c86cafcde5e432fb5515b3ff13b 100644 (file)
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -722,15 +722,13 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
  
      auto inputs = flatbuffer_builder->CreateVector(tensormap_inputs);
      auto outputs = flatbuffer_builder->CreateVector(tensormap_outputs);
-    auto method_name = flatbuffer_builder->CreateString(rec_signature_def.method_name());
-    auto key = flatbuffer_builder->CreateString(rec_signature_def.key());
-    // TODO add validation for method_name and key
+    auto signature_key = flatbuffer_builder->CreateString(rec_signature_def.signature_key());
+    // TODO add validation for signature_key
  
      ::tflite::SignatureDefBuilder signature_def_builder{*flatbuffer_builder};
      signature_def_builder.add_inputs(inputs);
      signature_def_builder.add_outputs(outputs);
-    signature_def_builder.add_method_name(method_name);
-    signature_def_builder.add_key(key);
+    signature_def_builder.add_signature_key(signature_key);
      signature_def_builder.add_subgraph_index(rec_signature_def.subgraph_index());
  
      signdef_vec.emplace_back(signature_def_builder.Finish());
diff --git a/compiler/tflchef/core/src/Op/FullyConnected.cpp b/compiler/tflchef/core/src/Op/FullyConnected.cpp

index 45269916c852bbb6679c69676dd2a545599311aa..7173a67ba22af140abb6fd6b264ddd519cc1d841 100644 (file)
--- a/compiler/tflchef/core/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/core/src/Op/FullyConnected.cpp
@@ -29,6 +29,7 @@ flatbuffers::Offset<void> FullyConnectedChef::value(flatbuffers::FlatBufferBuild
  
    tflite::FullyConnectedOptionsBuilder fc_options_builder{fbb};
    fc_options_builder.add_fused_activation_function(tflite_activation);
+  fc_options_builder.add_keep_num_dims(operation.fullyconnected_options().keep_num_dims());
  
    return fc_options_builder.Finish().Union();
  }
diff --git a/compiler/tflchef/core/src/Op/SVDF.cpp b/compiler/tflchef/core/src/Op/SVDF.cpp

new file mode 100644 (file)

index 0000000..690896c
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+#include "Convert.h"
+
+#include <cassert>
+
+flatbuffers::Offset<void> SVDFChef::value(flatbuffers::FlatBufferBuilder &fbb) const
+{
+  assert(_operation->has_svdf_options());
+
+  const auto &svdf_options = _operation->svdf_options();
+
+  const auto tflite_activation = as_tflite_activation(svdf_options.activation());
+
+  tflite::SVDFOptionsBuilder svdf_options_builder{fbb};
+  svdf_options_builder.add_fused_activation_function(tflite_activation);
+  svdf_options_builder.add_asymmetric_quantize_inputs(svdf_options.asymmetric_quantize_inputs());
+  svdf_options_builder.add_rank(svdf_options.rank());
+
+  return svdf_options_builder.Finish().Union();
+}
+
+std::unique_ptr<OpChef> SVDFChefFactory::create(const tflchef::Operation *operation) const
+{
+  return std::unique_ptr<OpChef>{new SVDFChef{operation}};
+}
diff --git a/compiler/tflchef/core/src/Op/SVDF.h b/compiler/tflchef/core/src/Op/SVDF.h

new file mode 100644 (file)

index 0000000..9bf0b6e
--- /dev/null
+++ b/compiler/tflchef/core/src/Op/SVDF.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __OP_SVDF_H__
+#define __OP_SVDF_H__
+
+#include "OpChef.h"
+
+class SVDFChef final : public OpChef
+{
+public:
+  explicit SVDFChef(const tflchef::Operation *operation) : _operation{operation}
+  {
+    // DO NOTHING
+  }
+
+public:
+  tflite::BuiltinOperator code(void) const override { return tflite::BuiltinOperator_SVDF; }
+
+  tflite::BuiltinOptions type(void) const override { return tflite::BuiltinOptions_SVDFOptions; }
+
+  flatbuffers::Offset<void> value(flatbuffers::FlatBufferBuilder &fbb) const override;
+
+private:
+  const tflchef::Operation *_operation;
+};
+
+struct SVDFChefFactory final : public OpChefFactory
+{
+  std::unique_ptr<OpChef> create(const tflchef::Operation *operation) const override;
+};
+
+#endif // __OP_SVDF_H__
diff --git a/compiler/tflchef/core/src/OpChef.def b/compiler/tflchef/core/src/OpChef.def

index b1e8a382926f413f75f42cc40a3b384e868b88b9..beebd359fa6e6cb3d60053c968e4bbc870f2e7f7 100644 (file)
--- a/compiler/tflchef/core/src/OpChef.def
+++ b/compiler/tflchef/core/src/OpChef.def
@@ -104,6 +104,7 @@ OP_CHEF(Squeeze, SqueezeChefFactory)
  OP_CHEF(StridedSlice, StridedSliceChefFactory)
  OP_CHEF(Sub, SubChefFactory)
  OP_CHEF(Sum, SumChefFactory)
+OP_CHEF(SVDF, SVDFChefFactory)
  OP_CHEF(Tanh, TanhChefFactory)
  OP_CHEF(Tile, TileChefFactory)
  OP_CHEF(TopKV2, TopKV2ChefFactory)
diff --git a/compiler/tflchef/core/src/OpChefs.h b/compiler/tflchef/core/src/OpChefs.h

index 35688ba9544f00b2e4500ef4ed9cc20a04c67df5..159019abf45c4a3d04e46070b5d543afecb01e06 100644 (file)
--- a/compiler/tflchef/core/src/OpChefs.h
+++ b/compiler/tflchef/core/src/OpChefs.h
@@ -117,6 +117,7 @@
  #include "Op/StridedSlice.h"
  #include "Op/Sub.h"
  #include "Op/Sum.h"
+#include "Op/SVDF.h"
  #include "Op/Tanh.h"
  #include "Op/Tile.h"
  #include "Op/TopKV2.h"
diff --git a/compiler/tflchef/proto/tflchef.proto b/compiler/tflchef/proto/tflchef.proto

index 4162cb1233eea3c10920c6fca0789d4a8c336812..1abefafe1dc216d61632986c2b3a900168e5ac79 100644 (file)
--- a/compiler/tflchef/proto/tflchef.proto
+++ b/compiler/tflchef/proto/tflchef.proto
@@ -182,6 +182,7 @@ message FloorModOptions {
  
  message FullyConnectedOptions {
    optional Activation activation = 1 [default = NONE];
+  optional bool keep_num_dims = 2 [ default = false ];
  }
  
  message AddOptions {
@@ -366,6 +367,12 @@ message SquaredDifferenceOptions {
    // None
  }
  
+message SVDFOptions {
+  optional int32 rank = 1 [default = 0];
+  optional Activation activation = 2 [default = NONE];
+  optional bool asymmetric_quantize_inputs = 3 [default = false];
+}
+
  message FillOptions {
    // None
  }
@@ -589,7 +596,7 @@ message Operation {
    optional ZerosLikeOptions zeros_like_options = 153;
    // ConcatEmbeddingsOptions 154
    // LSHProjectionOptions 155
-  // SVDFOptions 156
+  optional SVDFOptions svdf_options = 156;
    // RNNOptions 157
    optional L2NormOptions l2norm_options = 158;
    optional LocalResponseNormalizationOptions local_response_normalization_options = 159;
@@ -658,8 +665,8 @@ message TensorMap {
  message SignatureDef {
    repeated TensorMap inputs = 4;
    repeated TensorMap outputs = 5;
-  optional string method_name = 6;
-  optional string key = 10;
+  optional string signature_key = 6;
+  // optional string key = 10; obsolete in TF2.8.0
    optional uint32 subgraph_index = 12;
  }
  
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake

index 78bfa2d076188fffd53d50adfab262dc292ad2ed..a01da4258b06a0cf6e82038e42e5a9411c738f91 100644 (file)
--- a/compiler/tflchef/requires.cmake
+++ b/compiler/tflchef/requires.cmake
@@ -1,7 +1,7 @@
  require("arser")
  require("nnkit")
  require("cwrap")
-require("mio-tflite260")
+require("mio-tflite280")
  require("safemain")
  require("hermes")
  require("hermes-std")
diff --git a/compiler/tflchef/tests/CMakeLists.txt b/compiler/tflchef/tests/CMakeLists.txt

index 5c4dff012a46f3df847c2bfb779158938e2ec900..26cf67f4fd51c6b41a202fb942cb1ba18fa6a689 100644 (file)
--- a/compiler/tflchef/tests/CMakeLists.txt
+++ b/compiler/tflchef/tests/CMakeLists.txt
@@ -1,10 +1,11 @@
-if(NOT TARGET nnkit-run)
-  return()
-endif(NOT TARGET nnkit-run)
-
-if(NOT TARGET nnkit_tflite_backend)
-  return()
-endif(NOT TARGET nnkit_tflite_backend)
+set(TFLCHEF_FILE_PATH $<TARGET_FILE:tflchef-file>)
+set(TFLCHEF_REVERSE_PATH $<TARGET_FILE:tflchef-reverse>)
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  # TODO use better way to represent path for host executable
+  set(TFLCHEF_FILE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/file/tflchef-file)
+  set(TFLCHEF_REVERSE_PATH $ENV{BUILD_HOST_EXEC}/compiler/tflchef/tools/reverse/tflchef-reverse)
+  message(STATUS "TFLCHEF_FILE_PATH = ${TFLCHEF_FILE_PATH}")
+endif(DEFINED ENV{BUILD_HOST_EXEC})
  
  nncc_find_resource(TensorFlowLiteRecipes)
  set(TENSORFLOWLITERECIPES_DIR "${TensorFlowLiteRecipes_DIR}")
@@ -26,8 +27,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .tflite
    add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    list(APPEND TESTS ${RECIPE_PREFIX})
@@ -52,8 +53,8 @@ foreach(RECIPE IN ITEMS ${RECIPES})
  
    # Generate .tflite
    add_custom_command(OUTPUT ${RECIPE_OUTPUT_FILE}
-                     COMMAND tflchef-file ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
-                     DEPENDS tflchef-file ${RECIPE_SOURCE_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE} ${RECIPE_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_SOURCE_FILE}
                       COMMENT "Generating ${RECIPE_OUTPUT_FILE}")
  
    list(APPEND TESTS ${RECIPE_PREFIX})
@@ -76,16 +77,16 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
  
    # Generate .gen.recipe from generated .tflite
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
  
    # now we are going to generate .gen.tflite from .gen.recipe
    # to check generated .gen.recipe file is correct by using it.
    # as weight values may be different, binary comparision is not acceptable.
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
  
    list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -104,13 +105,13 @@ foreach(TFLITEFILE IN ITEMS ${GEN_TFLITEFILES})
  
    # Generate .gen.recipe from generated .tflite
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE}
-                     COMMAND tflchef-reverse ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
-                     DEPENDS tflchef-reverse ${RECIPE_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE}
+                     DEPENDS ${TFLCHEF_REVERSE_PATH} ${RECIPE_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE}")
  
    add_custom_command(OUTPUT ${RECIPE_GEN_OUTPUT_FILE2}
-                     COMMAND tflchef-file ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
-                     DEPENDS tflchef-file ${RECIPE_GEN_OUTPUT_FILE}
+                     COMMAND ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE} ${RECIPE_GEN_OUTPUT_FILE2}
+                     DEPENDS ${TFLCHEF_FILE_PATH} ${RECIPE_GEN_OUTPUT_FILE}
                       COMMENT "Generating ${RECIPE_GEN_OUTPUT_FILE2}")
  
    list(APPEND TESTS ${TFLITE_PREFIX}.gen)
@@ -123,7 +124,9 @@ add_custom_target(tflchef_testfiles ALL DEPENDS ${TESTFILES})
  
  # Using mio_tflite_validate for temporary as it only calls flatbuffer validate
  # TODO do testing with running the model with runtime/interpreter
+# NOTE for ARM32 cross build, $<TARGET_FILE:mio_tflite280_validate> is used as-is
+#      as test should run in ARM32 device
  add_test(NAME tflchef_test
           COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/runvalidate.sh"
-                 $<TARGET_FILE:mio_tflite_validate>
+                 $<TARGET_FILE:mio_tflite280_validate>
                   ${TESTS})
diff --git a/compiler/tflchef/tests/signature_def_index/test.recipe b/compiler/tflchef/tests/signature_def_index/test.recipe

index 4481752efa1585b69e4b2b4221b1280c498996d6..9e95edf00a7c54d621debcf7e965d4a379929355 100644 (file)
--- a/compiler/tflchef/tests/signature_def_index/test.recipe
+++ b/compiler/tflchef/tests/signature_def_index/test.recipe
@@ -50,8 +50,7 @@ signature_def {
      name: "ofm1"
      tensor_index: 1
    }
-  method_name: "serving_default"
-  key: "serv"
+  signature_key: "serving_default"
    subgraph_index: 0
  }
  input: "ifm"
diff --git a/compiler/tflchef/tests/signature_def_name/test.recipe b/compiler/tflchef/tests/signature_def_name/test.recipe

index 79be251385998dd03ec8a805c42bdc6b1ec40788..4847f7dd88210b6a6fce1c4027398ff817b1b716 100644 (file)
--- a/compiler/tflchef/tests/signature_def_name/test.recipe
+++ b/compiler/tflchef/tests/signature_def_name/test.recipe
@@ -50,8 +50,7 @@ signature_def {
      name: "out1"
      tensor: "ofm1"
    }
-  method_name: "serving_default"
-  key: "serv"
+  signature_key: "serving_default"
    subgraph_index: 0
  }
  input: "ifm"
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt

index 3c4c3fff65c266a1f73472d778f0ac769acf894f..3c3352b0a2649244a41a11b97ced39b56b542511 100644 (file)
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(tflchef_tflite STATIC ${SOURCES})
  target_include_directories(tflchef_tflite PUBLIC include)
  target_include_directories(tflchef_tflite PRIVATE src)
  target_link_libraries(tflchef_tflite tflchef_proto)
-target_link_libraries(tflchef_tflite mio_tflite260)
+target_link_libraries(tflchef_tflite mio_tflite280)
+target_link_libraries(tflchef_tflite mio_tflite280_helper)
  target_link_libraries(tflchef_tflite cwrap)
  target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp

index 1f6e73aa67f4ada10fd4ba6dacc738a886c8325c..bbc749fe4d716999241a0da15084107b3dd46ff1 100644 (file)
--- a/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
+++ b/compiler/tflchef/tflite/src/Op/FullyConnected.cpp
@@ -48,6 +48,7 @@ tflchef::Operation *TFliteOpFullyConnected::build(const tflite::Operator *op, TF
    auto op_options = operation->mutable_fullyconnected_options();
  
    op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_keep_num_dims(op_params->keep_num_dims());
  
    return operation;
  }
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.cpp b/compiler/tflchef/tflite/src/Op/SVDF.cpp

new file mode 100644 (file)

index 0000000..015f968
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/SVDF.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDF.h"
+
+#include "Convert.h"
+
+namespace tflchef
+{
+
+void TFliteOpSVDF::filler(const tflite::Operator *op, TFliteImport *import,
+                          tflchef::ModelRecipe *model_recipe) const
+{
+  const std::vector<int32_t> &inputs = as_index_vector(op->inputs());
+  assert(inputs.size() == 5);
+
+  // optional input tensor idx has minus value.
+  const bool hasBias = (inputs.at(3) >= 0);
+
+  // Note: last input is variable tensor without data
+  import->set_tensor_filler(inputs.at(1));
+  import->set_tensor_filler(inputs.at(2));
+  if (hasBias)
+    import->set_tensor_filler(inputs.at(3));
+}
+
+tflchef::Operation *TFliteOpSVDF::build(const tflite::Operator *op, TFliteImport *import,
+                                        tflchef::ModelRecipe *model_recipe) const
+{
+  const auto op_params = op->builtin_options_as_SVDFOptions();
+  assert(op_params != nullptr);
+
+  auto operation = model_recipe->add_operation();
+
+  operation->set_type("SVDF");
+
+  auto op_options = operation->mutable_svdf_options();
+
+  op_options->set_activation(as_tflchef_activation(op_params->fused_activation_function()));
+  op_options->set_asymmetric_quantize_inputs(op_params->asymmetric_quantize_inputs());
+  op_options->set_rank(op_params->rank());
+
+  return operation;
+}
+
+} // namespace tflchef
diff --git a/compiler/tflchef/tflite/src/Op/SVDF.h b/compiler/tflchef/tflite/src/Op/SVDF.h

new file mode 100644 (file)

index 0000000..a59ca54
--- /dev/null
+++ b/compiler/tflchef/tflite/src/Op/SVDF.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TFLITE_OP_SVDF_H__
+#define __TFLITE_OP_SVDF_H__
+
+#include "TFliteOpChef.h"
+
+namespace tflchef
+{
+
+/**
+ * @brief tflchef operator builder for SVDF
+ */
+class TFliteOpSVDF : public TFliteOpChef
+{
+public:
+  void filler(const tflite::Operator *op, TFliteImport *import,
+              tflchef::ModelRecipe *model_recipe) const override;
+  tflchef::Operation *build(const tflite::Operator *op, TFliteImport *import,
+                            tflchef::ModelRecipe *model_recipe) const override;
+};
+
+} // namespace tflchef
+
+#endif // __TFLITE_OP_SVDF_H__
diff --git a/compiler/tflchef/tflite/src/RecipeChef.cpp b/compiler/tflchef/tflite/src/RecipeChef.cpp

index d9215a4c45843986a27bc46f688d36f7151f3624..0701707c1c9ed2c3d85b0310795dedddb63f2b0a 100644 (file)
--- a/compiler/tflchef/tflite/src/RecipeChef.cpp
+++ b/compiler/tflchef/tflite/src/RecipeChef.cpp
@@ -15,6 +15,7 @@
   */
  
  #include <tflchef/RecipeChef.h>
+#include <mio_tflite280/Helper.h>
  
  #include "Convert.h"
  #include "TFliteImport.h"
@@ -42,7 +43,7 @@ void set_inputs(TFliteImport *import, tflchef::Operation *operation, const tflit
      else
      {
        auto tensor = tensors->Get(input);
-      std::string name = tensor_name(tensor);
+      std::string name = mio::tflite::tensor_name(tensor);
        operation->add_input(name);
      }
    }
@@ -56,7 +57,7 @@ void set_outputs(TFliteImport *import, tflchef::Operation *operation, const tfli
    for (auto output : outputs)
    {
      auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
      operation->add_output(name);
    }
  }
@@ -108,7 +109,7 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
  
      ::tflchef::Operand *operand = model_recipe->add_operand();
  
-    operand->set_name(tensor_name(tensor));
+    operand->set_name(mio::tflite::tensor_name(tensor));
      operand->set_type(as_tflchef_type(tensor->type()));
      operand->set_is_variable(tensor->is_variable());
  
@@ -311,14 +312,14 @@ std::unique_ptr<ModelRecipe> generate_recipe(const tflite::Model *model)
    for (const auto input : inputs)
    {
      auto tensor = tensors->Get(input);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
  
      model_recipe->add_input(name);
    }
    for (const auto output : outputs)
    {
      auto tensor = tensors->Get(output);
-    std::string name = tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
  
      model_recipe->add_output(name);
    }
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp

index 1462ee7f434e8a8361ea513438d3a9799baadd15..7114ab019a72b6c0c9d6de4581dcd5b0eacdd05e 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteImport.cpp
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -18,50 +18,13 @@
  
  #include "Convert.h"
  
+#include <mio_tflite280/Helper.h>
+
  #include <sstream>
  
  namespace tflchef
  {
  
-const char *kEmptyTensorName = "(noname)";
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-  return kEmptyTensorName;
-}
-
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  int8_t dp_code = opcode->deprecated_builtin_code();
-  // 127 is max of int8_t which is upper bound of v3 builtin_code
-  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
-  if (dp_code < 127 && dp_code >= 0)
-    return tflite::BuiltinOperator(dp_code);
-  return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
  TFliteImport::TFliteImport(const tflite::Model *model)
  {
    _subgraphs = model->subgraphs();
@@ -104,7 +67,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return builtin_code_neutral(opcode);
+  return mio::tflite::builtin_code_neutral(opcode);
  }
  
  std::string TFliteImport::opcode_name(const tflite::Operator *op) const
@@ -113,14 +76,14 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  if (is_custom(opcode))
+  if (mio::tflite::is_custom(opcode))
    {
      if (!opcode->custom_code())
        return "(invalid custom)";
@@ -128,7 +91,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
      return opcode->custom_code()->c_str();
    }
  
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
+  tflite::BuiltinOperator code = mio::tflite::builtin_code_neutral(opcode);
    return EnumNameBuiltinOperator(code);
  }
  
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h

index 43b5bbaffc02af172510f61c81da58daa0c11462..e6722e455346011244a3e14849b7769ffa30be72 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -34,12 +34,6 @@ using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>
  using TFliteBuffers_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Buffer>>;
  using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
  
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
diff --git a/compiler/tflchef/tflite/src/TFliteOpChefs.h b/compiler/tflchef/tflite/src/TFliteOpChefs.h

index 26ada7d0a53b1bf0d84f435c477ea33b4fe45bd2..b38b35a6166df31fb4b8b15328948c15046bb9a7 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpChefs.h
+++ b/compiler/tflchef/tflite/src/TFliteOpChefs.h
@@ -117,6 +117,7 @@
  #include "Op/StridedSlice.h"
  #include "Op/Sub.h"
  #include "Op/Sum.h"
+#include "Op/SVDF.h"
  #include "Op/Tanh.h"
  #include "Op/Tile.h"
  #include "Op/TopKV2.h"
diff --git a/compiler/tflchef/tflite/src/TFliteOpRegistry.h b/compiler/tflchef/tflite/src/TFliteOpRegistry.h

index 06394ddfa62834dcb845789ef43466517a96a876..4cbe7cfcb5caaac386b1cfa750b826001e880bc0 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteOpRegistry.h
+++ b/compiler/tflchef/tflite/src/TFliteOpRegistry.h
@@ -154,6 +154,7 @@ private:
      REG_TFL_OP(STRIDED_SLICE, TFliteOpStridedSlice);
      REG_TFL_OP(SUB, TFliteOpSub);
      REG_TFL_OP(SUM, TFliteOpSum);
+    REG_TFL_OP(SVDF, TFliteOpSVDF);
      REG_TFL_OP(TANH, TFliteOpTanh);
      REG_TFL_OP(TILE, TFliteOpTile);
      REG_TFL_OP(TOPK_V2, TFliteOpTopKV2);
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt

index 83f7febad32e18bc39ef90f31237e1ed8231aee6..fac0be6bf0181844aa0a695d3e30f6deff1ce361 100644 (file)
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_tflite260)
-  message(STATUS "Build tfldump: FAILED (missing mio_tflite260)")
+if(NOT TARGET mio_tflite280)
+  message(STATUS "Build tfldump: FAILED (missing mio_tflite280)")
    return()
-endif(NOT TARGET mio_tflite260)
+endif(NOT TARGET mio_tflite280)
  
  set(DRIVER "driver/Driver.cpp")
  
@@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  add_executable(tfldump ${DRIVER} ${SOURCES})
  target_include_directories(tfldump PRIVATE include)
  target_link_libraries(tfldump arser)
-target_link_libraries(tfldump mio_tflite260)
+target_link_libraries(tfldump mio_tflite280)
+target_link_libraries(tfldump mio_tflite280_helper)
  target_link_libraries(tfldump safemain)
-target_link_libraries(tfldump flatbuffers-1.12)
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake

index d0f9cccba0bbf6b15ff0809be7db2a4d3d41090a..b1abf94861686900eb29e61f931444c97268dfff 100644 (file)
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,3 @@
  require("arser")
-require("mio-tflite260")
+require("mio-tflite280")
  require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp

index 2351e4c3d91ff40eb2281ee770baa9cb10226444..2a87e47d755148bfb00b9a268571548b848636a7 100644 (file)
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -15,6 +15,7 @@
   */
  
  #include <tfldump/Dump.h>
+#include <mio_tflite280/Helper.h>
  
  #include "Read.h"
  #include "OpPrinter.h"
@@ -127,7 +128,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
  
    // dump operands(tensors)
    os << "Operands: T(subgraph index : tensor index) TYPE (shape) (shape_signature) "
-     << "B(buffer index) OperandName" << std::endl;
+     << "B(buffer index) (variable) OperandName" << std::endl;
    for (uint32_t i = 0; i < tensors->Length(); ++i)
    {
      // TODO refactor to some better structure
@@ -137,7 +138,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
      if (tensor->shape())
        dims = tflread::as_index_vector(tensor->shape());
  
-    os << "T(" << reader.subgraph_index() << ":" << i << ") " << tflread::tensor_type(tensor)
+    os << "T(" << reader.subgraph_index() << ":" << i << ") " << mio::tflite::tensor_type(tensor)
         << " ";
      os << "(" << dims << ") ";
      if (tensor->shape_signature())
@@ -146,7 +147,11 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
        os << "(" << dims_sig << ") ";
      }
      os << "B(" << tensor->buffer() << ") ";
-    os << tflread::tensor_name(tensor) << std::endl;
+    if (tensor->is_variable())
+    {
+      os << "(variable) ";
+    }
+    os << mio::tflite::tensor_name(tensor) << std::endl;
  
      if (auto q_params = tensor->quantization())
      {
@@ -298,7 +303,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
        if (input >= 0)
        {
          auto tensor = tensors->Get(input);
-        os << tflread::tensor_name(tensor);
+        os << mio::tflite::tensor_name(tensor);
        }
        os << std::endl;
      }
@@ -308,7 +313,7 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
        if (output >= 0)
        {
          auto tensor = tensors->Get(output);
-        os << tflread::tensor_name(tensor);
+        os << mio::tflite::tensor_name(tensor);
        }
        os << std::endl;
      }
@@ -321,14 +326,14 @@ void dump_sub_graph(std::ostream &os, tflread::Reader &reader)
    for (const auto input : reader.inputs())
    {
      auto tensor = tensors->Get(input);
-    std::string name = tflread::tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
      os << "I T(" << reader.subgraph_index() << ":" << input << ") " << name << std::endl;
    }
  
    for (const auto output : reader.outputs())
    {
      auto tensor = tensors->Get(output);
-    std::string name = tflread::tensor_name(tensor);
+    std::string name = mio::tflite::tensor_name(tensor);
      os << "O T(" << reader.subgraph_index() << ":" << output << ") " << name << std::endl;
    }
  
@@ -360,7 +365,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
      tflite::BuiltinOperator op_code = opcode->builtin_code();
      tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code());
  
-    auto op_name = tflread::opcode_name(opcode);
+    auto op_name = mio::tflite::opcode_name(opcode);
      auto op_version = opcode->version();
  
      os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
@@ -405,9 +410,8 @@ void dump_model(std::ostream &os, const tflite::Model *model)
      for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
      {
        auto sign_i = signaturedefs->Get(i);
-      os << "S(" << i << ") method_name(" << sign_i->method_name()->c_str() << "), key("
-         << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
-         << std::endl;
+      os << "S(" << i << ") signature_key(" << sign_i->signature_key()->c_str() << "), sub_graph("
+         << sign_i->subgraph_index() << ")" << std::endl;
  
        auto inputs_i = sign_i->inputs();
        for (uint32_t t = 0; t < inputs_i->Length(); ++t)
diff --git a/compiler/tfldump/src/Load.cpp b/compiler/tfldump/src/Load.cpp

index fe04a5dd6e6bcdf788e58800b696691e18a834a0..d2f6e06f1a61d3e164aec2f9a9eb3a0a35218d8d 100644 (file)
--- a/compiler/tfldump/src/Load.cpp
+++ b/compiler/tfldump/src/Load.cpp
@@ -76,7 +76,7 @@ public:
    {
      if (_value != -1)
      {
-      // Close on descturction
+      // Close on destructor
        close(_value);
      }
    }
diff --git a/compiler/tfldump/src/OpPrinter.cpp b/compiler/tfldump/src/OpPrinter.cpp

index 90cba71730fdf1dd34a4c1f7ee5aed47bd92d0fd..47edcb0867d0db3b34303b475610bc9c2aa35f9d 100644 (file)
--- a/compiler/tfldump/src/OpPrinter.cpp
+++ b/compiler/tfldump/src/OpPrinter.cpp
@@ -602,6 +602,23 @@ public:
    }
  };
  
+class SVDFPrinter : public OpPrinter
+{
+public:
+  void options(const tflite::Operator *op, std::ostream &os) const override
+  {
+    if (auto *params = op->builtin_options_as_SVDFOptions())
+    {
+      os << "    ";
+      os << "rank(" << params->rank() << ") ";
+      os << "activation(" << EnumNameActivationFunctionType(params->fused_activation_function())
+         << ") ";
+      os << "asymmetric_quantize_inputs(" << params->asymmetric_quantize_inputs() << ") ";
+      os << std::endl;
+    }
+  }
+};
+
  class TransposeConvPrinter : public OpPrinter
  {
  public:
@@ -776,6 +793,7 @@ OpPrinterRegistry::OpPrinterRegistry()
    _op_map[tflite::BuiltinOperator_STRIDED_SLICE] = make_unique<StridedSlicePrinter>();
    _op_map[tflite::BuiltinOperator_SUB] = make_unique<SubPrinter>();
    _op_map[tflite::BuiltinOperator_SUM] = make_unique<ReducerPrinter>();
+  _op_map[tflite::BuiltinOperator_SVDF] = make_unique<SVDFPrinter>();
    _op_map[tflite::BuiltinOperator_TRANSPOSE_CONV] = make_unique<TransposeConvPrinter>();
    // There is no Option for TOPK_V2
    _op_map[tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM] =
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp

index 8b3a96e837d481a05a97ecc4d1b5fe5bdebb373e..454e3a8a16a01bc730fabcee5a2674aa4007219d 100644 (file)
--- a/compiler/tfldump/src/Read.cpp
+++ b/compiler/tfldump/src/Read.cpp
@@ -16,76 +16,14 @@
  
  #include "Read.h"
  
+#include <mio_tflite280/Helper.h>
+
  #include <sstream>
  #include <string>
  
  namespace tflread
  {
  
-// This will provide v3/v3a format neutral BuiltinOperator
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  int8_t dp_code = opcode->deprecated_builtin_code();
-  if (dp_code < 127 && dp_code >= 0)
-    return tflite::BuiltinOperator(dp_code);
-  return opcode->builtin_code();
-}
-
-bool is_valid(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
-}
-
-bool is_custom(const tflite::OperatorCode *opcode)
-{
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return (code == tflite::BuiltinOperator_CUSTOM);
-}
-
-std::string opcode_name(const tflite::OperatorCode *opcode)
-{
-  assert(opcode);
-
-  if (!is_valid(opcode))
-  {
-    std::ostringstream oss;
-    oss << "(invalid)";
-    return oss.str();
-  }
-
-  if (is_custom(opcode))
-  {
-    if (!opcode->custom_code())
-      return "(invalid custom)";
-
-    std::string custom_op = "CUSTOM(";
-    custom_op += opcode->custom_code()->c_str();
-    custom_op += ")";
-    return custom_op;
-  }
-
-  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
-  return tflite::EnumNameBuiltinOperator(code);
-}
-
-const char *tensor_type(const tflite::Tensor *tensor)
-{
-  return tflite::EnumNameTensorType(tensor->type());
-}
-
-const char *tensor_name(const tflite::Tensor *tensor)
-{
-  static const char *kEmptyTensorName = "(noname)";
-
-  auto name = tensor->name();
-  if (name)
-    return name->c_str();
-
-  return kEmptyTensorName;
-}
-
  Reader::Reader(const tflite::Model *model)
  {
    _version = model->version();
@@ -129,7 +67,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return tflread::builtin_code_neutral(opcode);
+  return mio::tflite::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const tflite::Operator *op) const
@@ -138,14 +76,14 @@ std::string Reader::opcode_name(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  if (!is_valid(opcode))
+  if (!mio::tflite::is_valid(opcode))
    {
      std::ostringstream oss;
      oss << "(invalid: " << index << ")";
      return oss.str();
    }
  
-  return tflread::opcode_name(opcode);
+  return mio::tflite::opcode_name(opcode);
  }
  
  bool Reader::select_subgraph(uint32_t sgindex)
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h

index 80f317d0b929f43a88b347bb58fc34657b535a73..1ae63877fb90241a9d06ec7912a98ad3f2265f5e 100644 (file)
--- a/compiler/tfldump/src/Read.h
+++ b/compiler/tfldump/src/Read.h
@@ -36,13 +36,6 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
-bool is_valid(const tflite::OperatorCode *opcode);
-bool is_custom(const tflite::OperatorCode *opcode);
-std::string opcode_name(const tflite::OperatorCode *opcode);
-const char *tensor_type(const tflite::Tensor *tensor);
-const char *tensor_name(const tflite::Tensor *tensor);
-
  /**
   * @brief Loads TF lite file and provides helpers to access attributes
   */
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt

index 4ea01ad3157a4a38ca0852e29524279f825d2a5d..a317a63059c4b11800837c0969d9164d5ad0c410 100644 (file)
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -1,8 +1,8 @@
  nnas_include(TargetRequire)
  
  unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS mio_tflite260)
-list(APPEND REQUIRED_TARGETS mio_circle)
+list(APPEND REQUIRED_TARGETS mio_tflite280)
+list(APPEND REQUIRED_TARGETS mio_circle04)
  TargetRequire_Return(${REQUIRED_TARGETS})
  
  set(DRIVER "driver/Driver.cpp")
@@ -13,8 +13,9 @@ target_include_directories(tflite2circle PRIVATE src)
  target_link_libraries(tflite2circle arser)
  target_link_libraries(tflite2circle foder)
  target_link_libraries(tflite2circle safemain)
-target_link_libraries(tflite2circle mio_tflite260)
-target_link_libraries(tflite2circle mio_circle)
+target_link_libraries(tflite2circle mio_tflite280)
+target_link_libraries(tflite2circle mio_tflite280_helper)
+target_link_libraries(tflite2circle mio_circle04)
  target_link_libraries(tflite2circle vconone)
  target_link_libraries(tflite2circle nncc_coverage)
  
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake

index e39f9eeafc1eeced4a2b8f68b331a39a8b427ea7..3db9a2f2a93b9cb6270aaafd000da8015e891a0a 100644 (file)
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,6 +1,6 @@
  require("arser")
  require("foder")
-require("mio-tflite260")
-require("mio-circle")
+require("mio-tflite280")
+require("mio-circle04")
  require("safemain")
  require("vconone")
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions.h

index dc6ff086c8d35e41c953f070ae2998bcfa9acdd4..88a4f71dfa68a494c1ee441454b16ec707838078 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions.h
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions.h
@@ -102,6 +102,7 @@
  #include "BuildBuiltinOptions/SqueezeOptions.h"
  #include "BuildBuiltinOptions/StridedSliceOptions.h"
  #include "BuildBuiltinOptions/SubOptions.h"
+#include "BuildBuiltinOptions/SVDFOptions.h"
  #include "BuildBuiltinOptions/TileOptions.h"
  #include "BuildBuiltinOptions/TopKV2Options.h"
  #include "BuildBuiltinOptions/TransposeOptions.h"
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp

index 2619b73eb3c3275a89e3efe47b8742e8a49b45d5..27410012d385b9139f7fce153d07ec985831fc2f 100644 (file)
--- a/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/FullyConnectedOptions.cpp
@@ -37,6 +37,7 @@ build_circle_FullyConnectedOptions(flatbuffers::FlatBufferBuilder &fb, const tfl
    else if (tflite_weight_format == tflite::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8)
      builtin_options_builder.add_weights_format(
        circle::FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8);
+  builtin_options_builder.add_keep_num_dims(tflite_builtin_options->keep_num_dims());
    return builtin_options_builder.Finish();
  }
  
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp

new file mode 100644 (file)

index 0000000..e23738a
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SVDFOptions.h"
+#include "DataLookup.h"
+
+#include <cassert>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op)
+{
+  auto *tflite_builtin_options = op->builtin_options_as_SVDFOptions();
+  assert(tflite_builtin_options);
+
+  circle::SVDFOptionsBuilder builtin_options_builder{fb};
+  builtin_options_builder.add_rank(tflite_builtin_options->rank());
+  builtin_options_builder.add_asymmetric_quantize_inputs(
+    tflite_builtin_options->asymmetric_quantize_inputs());
+  builtin_options_builder.add_fused_activation_function(
+    get_circle_activation_function_type(tflite_builtin_options->fused_activation_function()));
+
+  return builtin_options_builder.Finish();
+}
+
+} // namespace tflite2circle
diff --git a/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h

new file mode 100644 (file)

index 0000000..2ddbd39
--- /dev/null
+++ b/compiler/tflite2circle/src/BuildBuiltinOptions/SVDFOptions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __BBO_SVDF_OPTIONS_H__
+#define __BBO_SVDF_OPTIONS_H__
+
+#include <mio/tflite/schema_generated.h>
+#include <mio/circle/schema_generated.h>
+
+namespace tflite2circle
+{
+
+flatbuffers::Offset<circle::SVDFOptions>
+build_circle_SVDFOptions(flatbuffers::FlatBufferBuilder &fb, const tflite::Operator *op);
+
+} // namespace tflite2circle
+
+#endif // __BBO_SVDF_OPTIONS_H__
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp

index 90cc415ffbe72cefec10a77d549fc18d6ae34bc3..d483b288f4c360a7e8cbf2695699eb43647d78a6 100644 (file)
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -16,11 +16,14 @@
  
  #include <cassert>
  #include <iostream>
+#include <map>
  #include <memory>
  
  #include "CircleModel.h"
  #include "DataLookup.h"
  
+#include <mio_tflite280/Helper.h>
+
  namespace tflite2circle
  {
  
@@ -206,7 +209,8 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
      auto tflite_inputs = it_sg->inputs();
      std::vector<int32_t> input_vec{tflite_inputs->begin(), tflite_inputs->end()};
  
-    // apply signature_def to input tensor index so that input orders are correct
+    // apply signature_def to input tensor index so that input orders follow like tensorflow lite
+    // interpreter._get_full_signature_list() method, which is ordered(sorted) in name
      // NOTE we do not need this when circle format supports signature_def
      if (_tfl_signature_def_offsets != nullptr)
      {
@@ -216,10 +220,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
          {
            auto inputs = it_signdef->inputs();
            assert(inputs->size() == input_vec.size());
-          uint32_t input_vec_idx = 0;
+
+          std::map<std::string, uint32_t> map_name_index;
            for (auto it_tm : *inputs)
            {
-            input_vec[input_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+            map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+          }
+          uint32_t input_vec_idx = 0;
+          for (auto &item : map_name_index)
+          {
+            input_vec[input_vec_idx++] = item.second;
            }
          }
        }
@@ -240,10 +250,16 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
          {
            auto outputs = it_signdef->outputs();
            assert(outputs->size() == output_vec.size());
-          uint32_t output_vec_idx = 0;
+
+          std::map<std::string, uint32_t> map_name_index;
            for (auto it_tm : *outputs)
            {
-            output_vec[output_vec_idx++] = static_cast<int32_t>(it_tm->tensor_index());
+            map_name_index[it_tm->name()->str()] = it_tm->tensor_index();
+          }
+          uint32_t output_vec_idx = 0;
+          for (auto &item : map_name_index)
+          {
+            output_vec[output_vec_idx++] = item.second;
            }
          }
        }
@@ -318,17 +334,6 @@ template <> void Offset<SubGraphLink>::build(const TFLFlatBufVec *tflite_flatbuf
    _circle_flatbuffer_vec_offset = _fb->CreateVector(subgprahs_vec);
  }
  
-tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
-{
-  assert(opcode != nullptr);
-  int8_t dp_code = opcode->deprecated_builtin_code();
-  // 127 is max of int8_t which is upper bound of v3 builtin_code
-  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
-  if (dp_code < 127 && dp_code >= 0)
-    return tflite::BuiltinOperator(dp_code);
-  return opcode->builtin_code();
-}
-
  template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_flatbuffer_vec)
  {
    std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
@@ -337,8 +342,9 @@ template <> void Offset<OperatorCodeLink>::build(const TFLFlatBufVec *tflite_fla
    {
      auto custom_code = _fb->CreateString(it->custom_code());
      circle::OperatorCodeBuilder operator_code_builder{*_fb};
-    // TODO support circle deprecated_builtin_code
-    auto bt_code = builtin_code_neutral(it);
+    auto de_code = it->deprecated_builtin_code();
+    auto bt_code = it->builtin_code();
+    operator_code_builder.add_deprecated_builtin_code(get_circle_builtin_code(de_code));
      operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
      operator_code_builder.add_custom_code(custom_code);
      operator_code_builder.add_version(it->version());
diff --git a/compiler/tflite2circle/src/DataLookup.cpp b/compiler/tflite2circle/src/DataLookup.cpp

index c5ed62e31454d5af99f1f34dd9b98f7312a9a594..7c3aab089c6ae26560869eb53e9de6f36cb8c023 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.cpp
+++ b/compiler/tflite2circle/src/DataLookup.cpp
@@ -34,6 +34,22 @@ circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop)
    }
  }
  
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8)
+{
+  tflite::BuiltinOperator tfl_bop = static_cast<tflite::BuiltinOperator>(tfl_bop_i8);
+
+  switch (tfl_bop)
+  {
+#define TFL_OPERATOR(OP)             \
+  case tflite::BuiltinOperator_##OP: \
+    return static_cast<int8_t>(circle::BuiltinOperator_##OP);
+#include "TFLOperator.lst"
+#undef TFL_OPERATOR
+    default:
+      throw std::runtime_error("tflite2circle: wrong op");
+  }
+}
+
  circle::TensorType get_circle_tensortype(tflite::TensorType tfl_tt)
  {
    switch (tfl_tt)
diff --git a/compiler/tflite2circle/src/DataLookup.h b/compiler/tflite2circle/src/DataLookup.h

index 601d014ddad6c96f111eb500758a4750116abc70..5aeeb6eca30aa41993b5d3aef922149866ec83d1 100644 (file)
--- a/compiler/tflite2circle/src/DataLookup.h
+++ b/compiler/tflite2circle/src/DataLookup.h
@@ -30,6 +30,8 @@ namespace tflite2circle
   */
  circle::BuiltinOperator get_circle_builtin_code(tflite::BuiltinOperator tfl_bop);
  
+int8_t get_circle_builtin_code(int8_t tfl_bop_i8);
+
  /**
   * @brief Returns circle TensorType according to tflite.
   *
diff --git a/compiler/tflite2circle/src/TFLBuiltinOptions.lst b/compiler/tflite2circle/src/TFLBuiltinOptions.lst

index f2de7e046bc9ca435b4d78515ae8952edfd97daf..d55ba464ad54ff9356623ca9e28bb6af09e090ab 100644 (file)
--- a/compiler/tflite2circle/src/TFLBuiltinOptions.lst
+++ b/compiler/tflite2circle/src/TFLBuiltinOptions.lst
@@ -9,7 +9,7 @@ TFL_BUILTIN_OPTIONS(DepthwiseConv2DOptions)
  //TFL_BUILTIN_OPTIONS(ConcatEmbeddingsOptions)
  //TFL_BUILTIN_OPTIONS(LSHProjectionOptions)
  TFL_BUILTIN_OPTIONS(Pool2DOptions)
-//TFL_BUILTIN_OPTIONS(SVDFOptions)
+TFL_BUILTIN_OPTIONS(SVDFOptions)
  //TFL_BUILTIN_OPTIONS(RNNOptions)
  TFL_BUILTIN_OPTIONS(FullyConnectedOptions)
  TFL_BUILTIN_OPTIONS(SoftmaxOptions)
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt

index 2241c9ec976c91d492e1a94838b5b7744f6bf324..3841a1b78dc444ee812b4fd4669a82457481641d 100644 (file)
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
  if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000000130001)
+  set(VCONONE_VERSION 0x0000000000140001)
    # NOTE order is [build patch minor major]
    # if VCONONE_VERSION is set with -D option, it will be cached
    # you may have to remove cache file if you remove -D option
diff --git a/docs/conf.py b/docs/conf.py

index ff4070f98d03f0d6d47b0df89b34a1666e0ad142..84197e6d6460e3be90170db43a013cbbfd86c180 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
  author = 'Samsung Research & contributors'
  
  # The full version, including alpha/beta/rc tags
-release = '1.19.0'
+release = '1.20.0'
  
  # -- General configuration ---------------------------------------------------
  
diff --git a/docs/howto/how-to-build-compiler.md b/docs/howto/how-to-build-compiler.md

index 75699890a3bc678804d9a92e56453ee3bfb21c0d..29e300bdda1b22118e3e9055660f590e170dfcb3 100644 (file)
--- a/docs/howto/how-to-build-compiler.md
+++ b/docs/howto/how-to-build-compiler.md
@@ -27,7 +27,7 @@ Here is a summary of it
  ```
  $ sudo apt-get install \
  build-essential \
-clang-format-3.9 \
+clang-format-8 \
  cmake \
  doxygen \
  git \
@@ -122,3 +122,73 @@ $ NNAS_BUILD_PREFIX=build ./nnas create-package --preset 20200731_windows --pref
  - `NNAS_BUILD_PREFIX` is the path to directory where compiler-build-artifacts will be stored.
  - `--preset` is the one that specifies a version you will install. You can see `infra/packaging/preset/` directory for more details and getting latest version.
  - `--prefix` is the install directory.
+
+## Cross build for Ubuntu/ARM32 (experimental)
+
+Some modules are availble to run in Ubuntu/ARM32 through cross building.
+
+While configuring the build, some modules need to execute tools for generating
+test materials and they need to execute in the host(x86-64). So some modules
+are needed to build the tools for host before cross building.
+
+Cross build overall steps are like, (1) configure for host
+(2) build tools for host (3) configure for ARM32 target (4) and then build
+for ARM32 target.
+
+Unit tests can also run in target device.
+But value test needs to run TensorFlow lite to get expected results,
+and it would be a task to do this so the data files from host execution
+are used instead.
+
+Thus to run the unit tests in the target, running in host is needed in prior.
+
+### Prepare root file system
+
+You should prepare Ubuntu/ARM32 root file system for cross compilation.
+Please refer
+[how-to-cross-build-runtime-for-arm.md](how-to-cross-build-runtime-for-arm.md)
+for preparation.
+
+You can set `ROOTFS_ARM` environment variable if you have in alternative
+folder.
+
+### Clean existing external source for patches
+
+Some external projects from source are not "cross compile ready with CMake"
+projects. This experimental project prepared some patches for this.
+Just remove the source and stamp file like below and the `make` will prepare
+patch applied source codes.
+```
+rm -rf externals/HDF5
+rm -rf externals/PROTOBUF
+rm externals/HDF5.stamp
+rm externals/PROTOBUF.stamp
+```
+
+### Build
+
+To cross build, `infra/nncc/Makefile.arm32` file is provided as an example to
+work with `make` command.
+```
+make -f infra/nncc/Makefile.arm32 cfg
+make -f infra/nncc/Makefile.arm32 debug
+```
+First `make` will run above steps (1), (2) and (3). Second `make` will run (4).
+
+### Test
+
+You can also run unit tests in ARM32 Ubuntu device with cross build results.
+First you need to run the test in host to prepare files that are currently
+complicated in target device.
+```
+# run this in x86-64 host
+make -f infra/nncc/Makefile.arm32 test_prep
+
+# run this in ARM32 target device
+make -f infra/nncc/Makefile.arm32 test
+```
+
+NOTE: this assumes
+- host and target have same directoy structure
+- should copy `build` folder to target or
+- mounting `ONE` folder with NFS on the target would be simple
diff --git a/docs/howto/how-to-build-runtime.md b/docs/howto/how-to-build-runtime.md

index 02ab47537baf08f309db100bbe100a6650eccae6..bf524d766cbeca60525ba984a2b0680fb6b91eca 100644 (file)
--- a/docs/howto/how-to-build-runtime.md
+++ b/docs/howto/how-to-build-runtime.md
@@ -15,14 +15,14 @@ In the Ubuntu, you can easily install it with the following command.
  $ sudo apt-get install cmake libboost-all-dev
  ```
  
-If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in the https://github.com/Samsung/ONE/blob/master/infra/docker/Dockerfile.1804 file.
+If your linux system does not have the basic development configuration, you will need to install more packages. A list of all packages needed to configure the development environment can be found in https://github.com/Samsung/ONE/blob/master/infra/docker/bionic/Dockerfile.
  
  Here is a summary of it
  
  ```
  $ sudo apt install \
  build-essential \
-clang-format-3.9 \
+clang-format-8 \
  cmake \
  doxygen \
  git \
diff --git a/infra/cmake/modules/ExternalBuildTools.cmake b/infra/cmake/modules/ExternalBuildTools.cmake

index 4f2027b4be788c1f30d69b605e87cad596490c13..557e6f47de36dc8cae7d89861ae5febfe3c30d87 100644 (file)
--- a/infra/cmake/modules/ExternalBuildTools.cmake
+++ b/infra/cmake/modules/ExternalBuildTools.cmake
@@ -14,7 +14,6 @@ function(ExternalBuild_CMake)
                          ${ARGN}
    )
  
-  set(BUILD_STAMP_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.stamp")
    set(BUILD_LOG_PATH "${ARG_BUILD_DIR}/${ARG_PKG_NAME}.log")
    set(INSTALL_STAMP_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.stamp")
    set(INSTALL_LOG_PATH "${ARG_INSTALL_DIR}/${ARG_PKG_NAME}.log")
@@ -24,14 +23,6 @@ function(ExternalBuild_CMake)
      set(PKG_IDENTIFIER "${ARG_IDENTIFIER}")
    endif(DEFINED ARG_IDENTIFIER)
  
-  # NOTE Do NOT retry build once it fails
-  if(EXISTS ${BUILD_STAMP_PATH})
-    file(READ ${BUILD_STAMP_PATH} READ_IDENTIFIER)
-    if("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
-      return()
-    endif("${READ_IDENTIFIER}" STREQUAL "${PKG_IDENTIFIER}")
-  endif(EXISTS ${BUILD_STAMP_PATH})
-
    # NOTE Do NOT build pre-installed exists
    if(EXISTS ${INSTALL_STAMP_PATH})
      file(READ ${INSTALL_STAMP_PATH} READ_IDENTIFIER)
@@ -42,11 +33,23 @@ function(ExternalBuild_CMake)
  
    message(STATUS "Build ${ARG_PKG_NAME} from ${ARG_CMAKE_DIR}")
  
+  # if we're doing the cross compilation, external project also needs it
+  if(CMAKE_TOOLCHAIN_FILE)
+    set(TOOLCHAIN_FILE ${CMAKE_TOOLCHAIN_FILE})
+    # NOTE CMAKE_TOOLCHAIN_FILE maybe relative path -> make abs folder
+    if(NOT EXISTS ${TOOLCHAIN_FILE})
+      set(TOOLCHAIN_FILE ${CMAKE_SOURCE_DIR}/${CMAKE_TOOLCHAIN_FILE})
+      if(NOT EXISTS ${TOOLCHAIN_FILE})
+        message(FATAL "Failed to find ${CMAKE_TOOLCHAIN_FILE}")
+      endif()
+    endif()
+    message(STATUS "ExternalBuild_CMake TOOLCHAIN_FILE=${TOOLCHAIN_FILE}")
+    list(APPEND ARG_EXTRA_OPTS -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE})
+  endif(CMAKE_TOOLCHAIN_FILE)
+
    file(MAKE_DIRECTORY ${ARG_BUILD_DIR})
    file(MAKE_DIRECTORY ${ARG_INSTALL_DIR})
  
-  file(WRITE "${BUILD_STAMP_PATH}" "${PKG_IDENTIFIER}")
-
    execute_process(COMMAND ${CMAKE_COMMAND}
                              -G "${CMAKE_GENERATOR}"
                              -DCMAKE_INSTALL_PREFIX=${ARG_INSTALL_DIR}
diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake

index c8ca57520b11388b54a82a0007181a2c0dbc9b1d..f71eb5d116af5c600ef9c2736c8057b4bc5ea627 100644 (file)
--- a/infra/cmake/modules/ExternalSourceTools.cmake
+++ b/infra/cmake/modules/ExternalSourceTools.cmake
@@ -5,7 +5,7 @@ function(ExternalSource_Download PREFIX)
    include(CMakeParseArguments)
    nnas_include(StampTools)
  
-  cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM" "" ${ARGN})
+  cmake_parse_arguments(ARG "" "DIRNAME;URL;CHECKSUM;PATCH" "" ${ARGN})
  
    # Configure URL
    if(ARG_URL)
@@ -104,11 +104,12 @@ function(ExternalSource_Download PREFIX)
      message(STATUS "Extract ${PREFIX}")
      execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}"
                      WORKING_DIRECTORY "${TMP_DIR}"
+                    RESULT_VARIABLE EXTRACTION_RESULT
                      ERROR_VARIABLE EXTRACTION_ERROR)
  
-    if(EXTRACTION_ERROR)
-      message(FATAL_ERROR "Extract ${PREFIX} - failed")
-    endif(EXTRACTION_ERROR)
+    if(EXTRACTION_RESULT AND NOT EXTRACTION_RESULT EQUAL 0)
+      message(FATAL_ERROR "Extract ${PREFIX} - failed: ${EXTRACTION_ERROR}")
+    endif()
  
      file(REMOVE "${DOWNLOAD_PATH}")
      message(STATUS "Extract ${PREFIX} - done")
@@ -123,6 +124,19 @@ function(ExternalSource_Download PREFIX)
      get_filename_component(contents ${contents} ABSOLUTE)
  
      file(RENAME ${contents} "${OUT_DIR}")
+    if(ARG_PATCH)
+      message(STATUS "Patch with ${ARG_PATCH}")
+      execute_process(COMMAND patch -p1 -i ${ARG_PATCH}
+                      WORKING_DIRECTORY ${OUT_DIR}
+                      RESULT_VARIABLE EXEC_RESULT
+                      ERROR_VARIABLE EXEC_ERROR)
+      if(NOT EXEC_RESULT EQUAL 0)
+        message(FATAL_ERROR "${PREFIX} failed patch ${ARG_PATCH}")
+      endif(NOT EXEC_RESULT EQUAL 0)
+
+      message(STATUS "patch ${PATCH_FILE}: ${EXEC_RESULT}, ${EXEC_ERROR}")
+    endif(ARG_PATCH)
+
      file(REMOVE_RECURSE "${TMP_DIR}")
      file(WRITE "${STAMP_PATH}" "${URL}")
      message(STATUS "Cleanup ${PREFIX} - done")
diff --git a/infra/cmake/modules/IdentifyPlatform.cmake b/infra/cmake/modules/IdentifyPlatform.cmake

index cf56dd0861d3ae0c345ea042008c75061b6ff8a1..6616283fb8c551cf9751d89719706295d6343a5b 100644 (file)
--- a/infra/cmake/modules/IdentifyPlatform.cmake
+++ b/infra/cmake/modules/IdentifyPlatform.cmake
@@ -37,6 +37,8 @@ if("${HOST_ARCH}" STREQUAL "x86_64")
    set(HOST_ARCH_BASE ${HOST_ARCH})
  elseif("${HOST_ARCH}" STREQUAL "armv7l")
    set(HOST_ARCH_BASE "arm")
+elseif("${HOST_ARCH}" STREQUAL "armv7hl")
+  set(HOST_ARCH_BASE "arm")
  elseif("${HOST_ARCH}" STREQUAL "aarch64")
    set(HOST_ARCH_BASE "aarch64")
  elseif("${HOST_ARCH}" STREQUAL "i686")
@@ -49,6 +51,8 @@ if("${TARGET_ARCH}" STREQUAL "x86_64")
    set(TARGET_ARCH_BASE ${TARGET_ARCH})
  elseif("${TARGET_ARCH}" STREQUAL "armv7l")
    set(TARGET_ARCH_BASE "arm")
+elseif("${TARGET_ARCH}" STREQUAL "armv7hl")
+  set(TARGET_ARCH_BASE "arm")
  elseif("${TARGET_ARCH}" STREQUAL "aarch64")
    set(TARGET_ARCH_BASE "aarch64")
  elseif("${TARGET_ARCH}" STREQUAL "i686")
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake

deleted file mode 100644 (file)

index 8b0a602..0000000
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-function(_FlatBuffers_import)
-  find_package(Flatbuffers QUIET)
-  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
-  if(NOT BUILD_FLATBUFFERS)
-    message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
-    return()
-  endif(NOT BUILD_FLATBUFFERS)
-
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
-  if(NOT FlatBuffersSource_FOUND)
-    # Source is not available
-    message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
-    return()
-  endif(NOT FlatBuffersSource_FOUND)
-
-  set(ADDITIONAL_CXX_FLAGS "")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
-    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
-  endif()
-
-  nnas_include(ExternalBuildTools)
-  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
-                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
-                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
-                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix6"
-                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
-                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
-                      PKG_NAME    "FLATBUFFERS-1.10")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
-  if(NOT TARGET flatbuffers-1.10)
-    add_library(flatbuffers-1.10 INTERFACE)
-    target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
-    message(STATUS "Found FlatBuffers-1.10: TRUE")
-  endif(NOT TARGET flatbuffers-1.10)
-
-  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
-    foreach(schema ${ARGN})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                       --no-union-value-namespacing
-                       --gen-object-api -o "${abs_output_dir}"
-                       ${SCHEMA_FILES}
-                       DEPENDS flatbuffers::flatc)
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(FlatBuffers_Generate)
-
-  function(FlatBuffers_Target TGT)
-    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
-    set(multiValueArgs SCHEMA_FILES)
-    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
-    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
-    if(NOT ARG_INCLUDE_DIR)
-      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
-    endif(NOT ARG_INCLUDE_DIR)
-
-    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
-    # Let's reset list variables before using them
-    # NOTE THIS DOES NOT AFFECT parent scope
-    unset(SCHEMA_FILES)
-    unset(OUTPUT_FILES)
-
-    foreach(schema ${ARG_SCHEMA_FILES})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    # Generate headers
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                               --no-union-value-namespacing
-                               --gen-object-api -o "${abs_output_dir}"
-                               ${SCHEMA_FILES}
-                       DEPENDS ${SCHEMA_FILES}
-                       COMMENT "Generate '${TGT}' headers")
-
-    # NOTE This header-only library is deliberately declared as STATIC library
-    #      to avoid possible scope issues related with generated files
-    add_library(${TGT} STATIC ${OUTPUT_FILES})
-    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
-    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
-    target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
-  endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake

deleted file mode 100644 (file)

index 6585f21..0000000
--- a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.10")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake

deleted file mode 100644 (file)

index 06366db..0000000
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-function(_FlatBuffers_import)
-  find_package(Flatbuffers QUIET)
-  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
-  if(NOT BUILD_FLATBUFFERS)
-    message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS")
-    return()
-  endif(NOT BUILD_FLATBUFFERS)
-
-  nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
-
-  if(NOT FlatBuffersSource_FOUND)
-    # Source is not available
-    message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND")
-    return()
-  endif(NOT FlatBuffersSource_FOUND)
-
-  set(ADDITIONAL_CXX_FLAGS "")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
-    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
-  endif()
-
-  nnas_include(ExternalBuildTools)
-  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
-                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
-                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
-                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.12-fix3"
-                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
-                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
-                      PKG_NAME    "FLATBUFFERS-1.12")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
-  if(NOT TARGET flatbuffers-1.12)
-    add_library(flatbuffers-1.12 INTERFACE)
-    target_link_libraries(flatbuffers-1.12 INTERFACE flatbuffers::flatbuffers)
-    message(STATUS "Found FlatBuffers-1.12: TRUE")
-  endif(NOT TARGET flatbuffers-1.12)
-
-  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
-    foreach(schema ${ARGN})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                       --no-union-value-namespacing
-                       --gen-object-api -o "${abs_output_dir}"
-                       ${SCHEMA_FILES}
-                       DEPENDS flatbuffers::flatc)
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(FlatBuffers_Generate)
-
-  function(FlatBuffers_Target TGT)
-    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
-    set(multiValueArgs SCHEMA_FILES)
-    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
-    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
-    if(NOT ARG_INCLUDE_DIR)
-      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
-    endif(NOT ARG_INCLUDE_DIR)
-
-    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
-    # Let's reset list variables before using them
-    # NOTE THIS DOES NOT AFFECT parent scope
-    unset(SCHEMA_FILES)
-    unset(OUTPUT_FILES)
-
-    foreach(schema ${ARG_SCHEMA_FILES})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    # Generate headers
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                               --no-union-value-namespacing
-                               --gen-object-api -o "${abs_output_dir}"
-                               ${SCHEMA_FILES}
-                       DEPENDS ${SCHEMA_FILES}
-                       COMMENT "Generate '${TGT}' headers")
-
-    # NOTE This header-only library is deliberately declared as STATIC library
-    #      to avoid possible scope issues related with generated files
-    add_library(${TGT} STATIC ${OUTPUT_FILES})
-    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
-    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
-    target_link_libraries(${TGT} PUBLIC flatbuffers-1.12)
-  endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake

deleted file mode 100644 (file)

index 8cfdbf8..0000000
--- a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.12")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake

new file mode 100644 (file)

index 0000000..b7ae666
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfig.cmake
@@ -0,0 +1,132 @@
+# TODO Remove other Flatbuffers versions
+function(_FlatBuffers_import)
+  find_package(Flatbuffers 2.0 QUIET)
+  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+  if(NOT BUILD_FLATBUFFERS)
+    message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS")
+    return()
+  endif(NOT BUILD_FLATBUFFERS)
+
+  nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
+
+  if(NOT FlatBuffersSource_FOUND)
+    # Source is not available
+    message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND")
+    return()
+  endif(NOT FlatBuffersSource_FOUND)
+
+  set(ADDITIONAL_CXX_FLAGS "")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+  endif()
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-2.0/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+                      IDENTIFIER  "2.0"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
+                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
+                      PKG_NAME    "FLATBUFFERS-2.0")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+# for cross compilation BUILD_HOST_EXEC should be set for host flatc executable
+# flatc should exist as ${BUILD_HOST_EXEC}/overlay/bin/flatc.
+# and then if EXTERNAL_FLATC is set then use ${EXTERNAL_FLATC} file.
+set(FLATC_PATH "$<TARGET_FILE:flatbuffers::flatc>")
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  set(FLATC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/flatc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_FLATC})
+  set(FLATC_PATH $ENV{EXTERNAL_FLATC})
+endif(DEFINED ENV{EXTERNAL_FLATC})
+
+if(FlatBuffers_FOUND)
+  if(NOT TARGET flatbuffers-2.0)
+    add_library(flatbuffers-2.0 INTERFACE)
+    target_link_libraries(flatbuffers-2.0 INTERFACE flatbuffers::flatbuffers)
+    message(STATUS "Found flatbuffers-2.0: TRUE")
+  endif(NOT TARGET flatbuffers-2.0)
+
+  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+    foreach(schema ${ARGN})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "${FLATC_PATH}" -c --no-includes
+                       --no-union-value-namespacing
+                       --gen-object-api -o "${abs_output_dir}"
+                       ${SCHEMA_FILES}
+                       DEPENDS flatbuffers::flatc)
+
+    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+  endfunction(FlatBuffers_Generate)
+
+  function(FlatBuffers_Target TGT)
+    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+    set(multiValueArgs SCHEMA_FILES)
+    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+    if(NOT ARG_INCLUDE_DIR)
+      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+    endif(NOT ARG_INCLUDE_DIR)
+
+    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+    # Let's reset list variables before using them
+    # NOTE THIS DOES NOT AFFECT parent scope
+    unset(SCHEMA_FILES)
+    unset(OUTPUT_FILES)
+
+    foreach(schema ${ARG_SCHEMA_FILES})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    # Generate headers
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "${FLATC_PATH}" -c --no-includes
+                               --no-union-value-namespacing
+                               --gen-object-api -o "${abs_output_dir}"
+                               ${SCHEMA_FILES}
+                       DEPENDS ${SCHEMA_FILES}
+                       COMMENT "Generate '${TGT}' headers")
+
+    # NOTE This header-only library is deliberately declared as STATIC library
+    #      to avoid possible scope issues related with generated files
+    add_library(${TGT} STATIC ${OUTPUT_FILES})
+    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+    target_link_libraries(${TGT} PUBLIC flatbuffers-2.0)
+  endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake

new file mode 100644 (file)

index 0000000..e4a87a7
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-2.0/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake

deleted file mode 100644 (file)

index 8b0a602..0000000
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-function(_FlatBuffers_import)
-  find_package(Flatbuffers QUIET)
-  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
-endfunction(_FlatBuffers_import)
-
-function(_FlatBuffers_build)
-  if(NOT BUILD_FLATBUFFERS)
-    message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
-    return()
-  endif(NOT BUILD_FLATBUFFERS)
-
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
-
-  if(NOT FlatBuffersSource_FOUND)
-    # Source is not available
-    message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
-    return()
-  endif(NOT FlatBuffersSource_FOUND)
-
-  set(ADDITIONAL_CXX_FLAGS "")
-  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
-    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
-  endif()
-
-  nnas_include(ExternalBuildTools)
-  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
-                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
-                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
-                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix6"
-                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
-                                  "-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON"
-                      PKG_NAME    "FLATBUFFERS-1.10")
-
-endfunction(_FlatBuffers_build)
-
-_FlatBuffers_build()
-_FlatBuffers_import()
-
-if(FlatBuffers_FOUND)
-  if(NOT TARGET flatbuffers-1.10)
-    add_library(flatbuffers-1.10 INTERFACE)
-    target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
-    message(STATUS "Found FlatBuffers-1.10: TRUE")
-  endif(NOT TARGET flatbuffers-1.10)
-
-  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
-    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
-
-    foreach(schema ${ARGN})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                       --no-union-value-namespacing
-                       --gen-object-api -o "${abs_output_dir}"
-                       ${SCHEMA_FILES}
-                       DEPENDS flatbuffers::flatc)
-
-    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
-    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
-  endfunction(FlatBuffers_Generate)
-
-  function(FlatBuffers_Target TGT)
-    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
-    set(multiValueArgs SCHEMA_FILES)
-    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-
-    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
-    if(NOT ARG_INCLUDE_DIR)
-      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
-    endif(NOT ARG_INCLUDE_DIR)
-
-    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
-    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
-    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
-
-    # Let's reset list variables before using them
-    # NOTE THIS DOES NOT AFFECT parent scope
-    unset(SCHEMA_FILES)
-    unset(OUTPUT_FILES)
-
-    foreach(schema ${ARG_SCHEMA_FILES})
-      get_filename_component(schema_fn "${schema}" NAME)
-      get_filename_component(dir "${schema}" DIRECTORY)
-
-      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
-
-      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
-      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
-    endforeach()
-
-    # Generate headers
-    add_custom_command(OUTPUT ${OUTPUT_FILES}
-                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
-                               --no-union-value-namespacing
-                               --gen-object-api -o "${abs_output_dir}"
-                               ${SCHEMA_FILES}
-                       DEPENDS ${SCHEMA_FILES}
-                       COMMENT "Generate '${TGT}' headers")
-
-    # NOTE This header-only library is deliberately declared as STATIC library
-    #      to avoid possible scope issues related with generated files
-    add_library(${TGT} STATIC ${OUTPUT_FILES})
-    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
-    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
-    target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
-  endfunction(FlatBuffers_Target)
-endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake

deleted file mode 100644 (file)

index 09a922b..0000000
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  envoption(FLATBUFFERS_1_10_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS-1.10
-    CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
-    URL ${FLATBUFFERS_1_10_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake

deleted file mode 100644 (file)

index 6585f21..0000000
--- a/infra/cmake/packages/FlatBuffersSource-1.10/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.10")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake

deleted file mode 100644 (file)

index 9ee2c49..0000000
--- a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  envoption(FLATBUFFERS_1_12_URL https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS-1.12
-    CHECKSUM MD5=c62ffefb3d4548b127cca14ce047f16c
-    URL ${FLATBUFFERS_1_12_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake

deleted file mode 100644 (file)

index 8cfdbf8..0000000
--- a/infra/cmake/packages/FlatBuffersSource-1.12/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,10 +0,0 @@
-set(PACKAGE_VERSION "1.12")
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
-  set(PACKAGE_VERSION_EXACT TRUE)
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake

new file mode 100644 (file)

index 0000000..a0a32aa
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_FlatBuffersSource_import)
+  if(NOT DOWNLOAD_FLATBUFFERS)
+    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_FLATBUFFERS)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(FLATBUFFERS_2_0_URL https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz)
+  ExternalSource_Download(FLATBUFFERS
+    DIRNAME FLATBUFFERS-2.0
+    CHECKSUM MD5=a27992324c3cbf86dd888268a23d17bd
+    URL ${FLATBUFFERS_2_0_URL}
+  )
+
+  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
+  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_FlatBuffersSource_import)
+
+_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..e4a87a7
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffersSource-2.0/FlatBuffersSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersSourceConfig.cmake b/infra/cmake/packages/FlatBuffersSourceConfig.cmake

deleted file mode 100644 (file)

index 52bce6d..0000000
--- a/infra/cmake/packages/FlatBuffersSourceConfig.cmake
+++ /dev/null
@@ -1,28 +0,0 @@
-function(_FlatBuffersSource_import)
-  if(NOT DOWNLOAD_FLATBUFFERS)
-    set(FlatBuffersSource_FOUND FALSE PARENT_SCOPE)
-    return()
-  endif(NOT DOWNLOAD_FLATBUFFERS)
-
-  nnas_include(ExternalSourceTools)
-  nnas_include(OptionTools)
-
-  # Each TensorFlow needs a specific version of Flatbuffers
-  # - TensorFlow 1.7 downloads it from https://github.com/google/flatbuffers/archive/971a68110e4.tar.gz
-  # - TensorFlow 1.12 downloads it from https://github.com/google/flatbuffers/archive/1f5eae5d6a1.tar.gz
-  #
-  # Let's use 1.10 released in 2018.10 (compatible with 1f5eae5d6a1).
-  #
-  # TODO Manage multiple versions
-  envoption(FLATBUFFERS_URL https://github.com/google/flatbuffers/archive/v1.10.0.tar.gz)
-  ExternalSource_Download(FLATBUFFERS
-    DIRNAME FLATBUFFERS
-    CHECKSUM MD5=f7d19a3f021d93422b0bc287d7148cd2
-    URL ${FLATBUFFERS_URL}
-  )
-
-  set(FlatBuffersSource_DIR ${FLATBUFFERS_SOURCE_DIR} PARENT_SCOPE)
-  set(FlatBuffersSource_FOUND TRUE PARENT_SCOPE)
-endfunction(_FlatBuffersSource_import)
-
-_FlatBuffersSource_import()
diff --git a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake b/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake

deleted file mode 100644 (file)

index ac9e22e..0000000
--- a/infra/cmake/packages/FlatBuffersSourceConfigVersion.cmake
+++ /dev/null
@@ -1,9 +0,0 @@
-set(PACKAGE_VERSION_EXACT FALSE)
-set(PACKAGE_VERSION_COMPATIBLE FALSE)
-set(PACKAGE_VERSION_UNSUITABLE TRUE)
-
-if(NOT PACKAGE_FIND_VERSION)
-  # This package works only when find_package(...) call has no EXACT option
-  set(PACKAGE_VERSION_COMPATIBLE TRUE)
-  set(PACKAGE_VERSION_UNSUITABLE FALSE)
-endif(NOT PACKAGE_FIND_VERSION)
diff --git a/infra/cmake/packages/GTestConfig.cmake b/infra/cmake/packages/GTestConfig.cmake

index 62a15e0ccbe2469111dc118fc14a797a5f71a544..872ff72765a77bc68252250bbfc3e439ad9e2bf1 100644 (file)
--- a/infra/cmake/packages/GTestConfig.cmake
+++ b/infra/cmake/packages/GTestConfig.cmake
@@ -6,6 +6,7 @@ function(_GTest_build)
    nnas_find_package(GTestSource QUIET)
  
    if(NOT GTestSource_FOUND)
+    message(STATUS "GTest_build skip: NOT GTestSource_FOUND")
      return()
    endif(NOT GTestSource_FOUND)
  
@@ -13,9 +14,14 @@ function(_GTest_build)
    ExternalBuild_CMake(CMAKE_DIR   ${GTestSource_DIR}
                        BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/GTEST/build
                        INSTALL_DIR ${EXT_OVERLAY_DIR}
-                      IDENTIFIER  "1.8.0-fix1"
+                      IDENTIFIER  "1.11.0"
                        PKG_NAME    "GTEST")
  
+  set(GTEST_FOUND TRUE PARENT_SCOPE)
+  set(GTEST_INCLUDE_DIRS ${EXT_OVERLAY_DIR}/include PARENT_SCOPE)
+  set(GTEST_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest.a PARENT_SCOPE)
+  set(GTEST_MAIN_LIBRARIES ${EXT_OVERLAY_DIR}/lib/libgtest_main.a PARENT_SCOPE)
+
  endfunction(_GTest_build)
  
  _GTest_build()
@@ -24,7 +30,14 @@ _GTest_build()
  # Note: cmake supports GTest and does not find GTestConfig.cmake or GTest-config.cmake.
  # Refer to "https://cmake.org/cmake/help/v3.5/module/FindGTest.html"
  # find_package(GTest) creates options like GTEST_FOUND, not GTest_FOUND.
-find_package(GTest)
+if(GTEST_FOUND)
+  message(STATUS "Found GTest: true")
+else(GTEST_FOUND)
+  message(STATUS "GTEST_FOUND false: call find_package(GTest)")
+  # Reset package config directory cache to prevent recursive find
+  unset(GTest_DIR CACHE)
+  find_package(GTest)
+endif(GTEST_FOUND)
  find_package(Threads)
  
  if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/cmake/packages/GTestSourceConfig.cmake b/infra/cmake/packages/GTestSourceConfig.cmake

index 8b7495fbc136097b96962b4e36919a25cf2c7598..e57d0965a5aabb0d65e58b428f0dc3733a0f62cc 100644 (file)
--- a/infra/cmake/packages/GTestSourceConfig.cmake
+++ b/infra/cmake/packages/GTestSourceConfig.cmake
@@ -7,7 +7,7 @@ function(_GTestSource_import)
    nnas_include(ExternalSourceTools)
    nnas_include(OptionTools)
  
-  envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.8.0.tar.gz)
+  envoption(GTEST_URL https://github.com/google/googletest/archive/release-1.11.0.tar.gz)
  
    ExternalSource_Download(GTEST ${GTEST_URL})
  
diff --git a/infra/cmake/packages/H5Tinit.c.linux-armv7l b/infra/cmake/packages/H5Tinit.c.linux-armv7l

new file mode 100644 (file)

index 0000000..b0f6a47
--- /dev/null
+++ b/infra/cmake/packages/H5Tinit.c.linux-armv7l
@@ -0,0 +1,977 @@
+/* Generated automatically by H5detect -- do not edit */
+
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group.                                               *
+ * Copyright by the Board of Trustees of the University of Illinois.         *
+ * All rights reserved.                                                      *
+ *                                                                           *
+ * This file is part of HDF5.  The full HDF5 copyright notice, including     *
+ * terms governing use, modification, and redistribution, is contained in    *
+ * the files COPYING and Copyright.html.  COPYING can be found at the root   *
+ * of the source code distribution tree; Copyright.html can be found at the  *
+ * root level of an installed copy of the electronic HDF5 document set and   *
+ * is linked from the top-level documents page.  It can also be found at     *
+ * http://hdfgroup.org/HDF5/doc/Copyright.html.  If you do not have          *
+ * access to either file, you may request a copy from help@hdfgroup.org.     *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ *
+ * Created:            Mar 31, 2022
+ *                     Ubuntu <ubuntu@rpi4>
+ *
+ * Purpose:            This machine-generated source code contains
+ *                     information about the various integer and
+ *                     floating point numeric formats found on this
+ *                     architecture.  The parameters below should be
+ *                     checked carefully and errors reported to the
+ *                     HDF5 maintainer.
+ *                     
+ *                     Each of the numeric formats listed below are
+ *                     printed from most significant bit to least
+ *                     significant bit even though the actual bytes
+ *                     might be stored in a different order in
+ *                     memory.  The integers above each binary byte
+ *                     indicate the relative order of the bytes in
+ *                     memory; little-endian machines have
+ *                     decreasing numbers while big-endian machines
+ *                     have increasing numbers.
+ *                     
+ *                     The fields of the numbers are printed as
+ *                     letters with `S' for the mantissa sign bit,
+ *                     `M' for the mantissa magnitude, and `E' for
+ *                     the exponent.  The exponent has an associated
+ *                     bias which can be subtracted to find the
+ *                     true exponent.  The radix point is assumed
+ *                     to be before the first `M' bit.  Any bit
+ *                     of a floating-point value not falling into one
+ *                     of these categories is printed as a question
+ *                     mark.  Bits of integer types are printed as
+ *                     `I' for 2's complement and `U' for magnitude.
+ *                     
+ *                     If the most significant bit of the normalized
+ *                     mantissa (always a `1' except for `0.0') is
+ *                     not stored then an `implicit=yes' appears
+ *                     under the field description.  In thie case,
+ *                     the radix point is still assumed to be
+ *                     before the first `M' but after the implicit
+ *                     bit.
+ *
+ * Modifications:
+ *
+ *     DO NOT MAKE MODIFICATIONS TO THIS FILE!
+ *     It was generated by code in `H5detect.c'.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#define H5T_PACKAGE /*suppress error about including H5Tpkg.h*/
+
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h"         /* Generic Functions                    */
+#include "H5Eprivate.h"                /* Error handling                       */
+#include "H5FLprivate.h"       /* Free Lists                           */
+#include "H5Iprivate.h"                /* IDs                                  */
+#include "H5Tpkg.h"            /* Datatypes                            */
+
+
+/****************/
+/* Local Macros */
+/****************/
+
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+
+/********************/
+/* Package Typedefs */
+/********************/
+
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+
+/********************/
+/* Public Variables */
+/********************/
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+
+\f
+/*-------------------------------------------------------------------------
+ * Function:   H5TN_init_interface
+ *
+ * Purpose:    Initialize pre-defined native datatypes from code generated
+ *              during the library configuration by H5detect.
+ *
+ * Return:     Success:        non-negative
+ *             Failure:        negative
+ *
+ * Programmer: Robb Matzke
+ *              Wednesday, December 16, 1998
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5TN_init_interface(void)
+{
+    H5T_t      *dt = NULL;
+    herr_t     ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_SCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_SCHAR_ALIGN_g = 1;
+    H5T_NATIVE_SCHAR_COMP_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UCHAR_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_SHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_SHORT_ALIGN_g = 1;
+    H5T_NATIVE_SHORT_COMP_ALIGN_g = 2;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_USHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_USHORT_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_ALIGN_g = 1;
+    H5T_NATIVE_INT_COMP_ALIGN_g = 4;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_LONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LONG_ALIGN_g = 1;
+    H5T_NATIVE_LONG_COMP_ALIGN_g = 4;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_ULONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_ULONG_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST8_ALIGN_g = 1;
+
+   /*
+    *    0
+    * UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 1;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 8;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST8_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST16_ALIGN_g = 1;
+
+   /*
+    *    1        0
+    * UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 2;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 16;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST16_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST32_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST32_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_LEAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_LEAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_INT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_INT_FAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_UINT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_UINT_FAST64_ALIGN_g = 1;
+
+   /*
+    *    7        6        5        4
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    *    3        2        1        0
+    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
+    if((H5T_NATIVE_LLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LLONG_ALIGN_g = 1;
+    H5T_NATIVE_LLONG_COMP_ALIGN_g = 8;
+
+   /*
+    *    7        6        5        4
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    *    3        2        1        0
+    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_INTEGER;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
+    if((H5T_NATIVE_ULLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_ULLONG_ALIGN_g = 1;
+
+   /*
+    *    3        2        1        0
+    * SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 4;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 32;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 31;
+    dt->shared->u.atomic.u.f.epos = 23;
+    dt->shared->u.atomic.u.f.esize = 8;
+    dt->shared->u.atomic.u.f.ebias = 0x0000007f;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 23;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_FLOAT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_FLOAT_ALIGN_g = 1;
+    H5T_NATIVE_FLOAT_COMP_ALIGN_g = 4;
+
+   /*
+    *    7        6        5        4
+    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+    *    3        2        1        0
+    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 63;
+    dt->shared->u.atomic.u.f.epos = 52;
+    dt->shared->u.atomic.u.f.esize = 11;
+    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 52;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_DOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_DOUBLE_ALIGN_g = 1;
+    H5T_NATIVE_DOUBLE_COMP_ALIGN_g = 8;
+
+   /*
+    *    7        6        5        4
+    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
+    *    3        2        1        0
+    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
+    * Implicit bit? yes
+    * Alignment: none
+    */
+    if(NULL == (dt = H5T__alloc()))
+        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
+    dt->shared->state = H5T_STATE_IMMUTABLE;
+    dt->shared->type = H5T_FLOAT;
+    dt->shared->size = 8;
+    dt->shared->u.atomic.order = H5T_ORDER_LE;
+    dt->shared->u.atomic.offset = 0;
+    dt->shared->u.atomic.prec = 64;
+    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
+    dt->shared->u.atomic.u.f.sign = 63;
+    dt->shared->u.atomic.u.f.epos = 52;
+    dt->shared->u.atomic.u.f.esize = 11;
+    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
+    dt->shared->u.atomic.u.f.mpos = 0;
+    dt->shared->u.atomic.u.f.msize = 52;
+    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
+    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
+    if((H5T_NATIVE_LDOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
+        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
+    H5T_NATIVE_LDOUBLE_ALIGN_g = 1;
+    H5T_NATIVE_LDOUBLE_COMP_ALIGN_g = 8;
+
+    /* Set the native order for this machine */
+    H5T_native_order_g = H5T_ORDER_LE;
+
+    /* Structure alignment for pointers, hvl_t, hobj_ref_t, hdset_reg_ref_t */
+    H5T_POINTER_COMP_ALIGN_g = 4;
+    H5T_HVL_COMP_ALIGN_g = 4;
+    H5T_HOBJREF_COMP_ALIGN_g = 8;
+    H5T_HDSETREGREF_COMP_ALIGN_g = 1;
+
+done:
+    if(ret_value < 0) {
+        if(dt != NULL) {
+            dt->shared = H5FL_FREE(H5T_shared_t, dt->shared);
+            dt = H5FL_FREE(H5T_t, dt);
+        } /* end if */
+    } /* end if */
+
+    FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5TN_init_interface() */
+
+/****************************************/
+/* ALIGNMENT and signal-handling status */
+/****************************************/
+/* Signal() support: yes */
+/* setjmp() support: yes */
+/* longjmp() support: yes */
+/* sigsetjmp() support: yes */
+/* siglongjmp() support: yes */
+/* sigprocmask() support: yes */
+
+/******************************/
+/* signal handlers statistics */
+/******************************/
+/* signal_handlers tested: 15 times */
+/* sigbus_handler called: 5 times */
+/* sigsegv_handler called: 5 times */
+/* sigill_handler called: 5 times */
diff --git a/infra/cmake/packages/HDF5Config.cmake b/infra/cmake/packages/HDF5Config.cmake

index 19803f1eafa7af8af618d41104c8bd9fb7a977f5..4ab338144239a2280a0df38b8e2a72885eff3a23 100644 (file)
--- a/infra/cmake/packages/HDF5Config.cmake
+++ b/infra/cmake/packages/HDF5Config.cmake
@@ -6,9 +6,24 @@ function(_HDF5_build)
    nnas_find_package(HDF5Source QUIET)
  
    if(NOT HDF5Source_FOUND)
+    message(STATUS "HD5Config skip: HDF5Source NOT FOUND")
      return()
    endif(NOT HDF5Source_FOUND)
  
+  if(DEFINED ENV{BUILD_HOST_EXEC})
+    set(EXTERNAL_H5MAKE_LIBSETTINGS $ENV{BUILD_HOST_EXEC}/externals/HDF5/build/bin/H5make_libsettings)
+    set(ENV{EXTERNAL_H5MAKE_LIBSETTINGS} ${EXTERNAL_H5MAKE_LIBSETTINGS})
+
+    # NOTE https://github.com/Samsung/ONE/issues/8762
+    # TODO generalize to select 'linux-armv7l'
+    set(H5TINIT_C_FROM_NATIVE ${CMAKE_CURRENT_LIST_DIR}/H5Tinit.c.linux-armv7l)
+    set(H5TINIT_C_COPY ${CMAKE_BINARY_DIR}/externals/HDF5/build/H5Tinit.c)
+    message(STATUS "Copy H5Tinit.c generated from target native build")
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} -E copy "${H5TINIT_C_FROM_NATIVE}" "${H5TINIT_C_COPY}"
+    )
+  endif(DEFINED ENV{BUILD_HOST_EXEC})
+
    nnas_include(ExternalBuildTools)
    ExternalBuild_CMake(CMAKE_DIR   ${HDF5Source_DIR}
                        BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/HDF5/build
@@ -26,6 +41,7 @@ _HDF5_build()
  
  find_path(HDF5_CONFIG_DIR "hdf5-config.cmake"
            PATHS ${EXT_OVERLAY_DIR}
+          NO_CMAKE_FIND_ROOT_PATH
            PATH_SUFFIXES
              cmake
              share/cmake
diff --git a/infra/cmake/packages/HDF5Source.patch b/infra/cmake/packages/HDF5Source.patch

new file mode 100644 (file)

index 0000000..b8602a0
--- /dev/null
+++ b/infra/cmake/packages/HDF5Source.patch
@@ -0,0 +1,195 @@
+Only in HDF5: build
+diff -r -u a/config/cmake/ConfigureChecks.cmake b/config/cmake/ConfigureChecks.cmake
+--- a/config/cmake/ConfigureChecks.cmake
++++ b/config/cmake/ConfigureChecks.cmake
+@@ -109,15 +109,15 @@
+ if (NOT WINDOWS)
+   CHECK_FUNCTION_EXISTS(clock_gettime CLOCK_GETTIME_IN_LIBC)
+   CHECK_LIBRARY_EXISTS(rt clock_gettime "" CLOCK_GETTIME_IN_LIBRT)
+-  CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
++  #CHECK_LIBRARY_EXISTS(posix4 clock_gettime "" CLOCK_GETTIME_IN_LIBPOSIX4)
+   if (CLOCK_GETTIME_IN_LIBC)
+     set (H5_HAVE_CLOCK_GETTIME 1)
+   elseif (CLOCK_GETTIME_IN_LIBRT)
+     set (H5_HAVE_CLOCK_GETTIME 1)
+     list (APPEND LINK_LIBS rt)
+-  elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
+-    set (H5_HAVE_CLOCK_GETTIME 1)
+-    list (APPEND LINK_LIBS posix4)
++  #elseif (CLOCK_GETTIME_IN_LIBPOSIX4)
++  #  set (H5_HAVE_CLOCK_GETTIME 1)
++  #  list (APPEND LINK_LIBS posix4)
+   endif (CLOCK_GETTIME_IN_LIBC)
+ endif (NOT WINDOWS)
+ #-----------------------------------------------------------------------------
+@@ -130,12 +130,17 @@
+   if (HDF5_ENABLE_DIRECT_VFD)
+     set (msg "Performing TEST_DIRECT_VFD_WORKS")
+     set (MACRO_CHECK_FUNCTION_DEFINITIONS "-DTEST_DIRECT_VFD_WORKS -D_GNU_SOURCE ${CMAKE_REQUIRED_FLAGS}")
++    if(NOT CMAKE_CROSSCOMPILING)
+     TRY_RUN (TEST_DIRECT_VFD_WORKS_RUN   TEST_DIRECT_VFD_WORKS_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
+         OUTPUT_VARIABLE OUTPUT
+     )
++    else(NOT CMAKE_CROSSCOMPILING)
++      set(TEST_DIRECT_VFD_WORKS_RUN 0)
++      set(TEST_DIRECT_VFD_WORKS_COMPILE TRUE)
++    endif(NOT CMAKE_CROSSCOMPILING)
+     if (TEST_DIRECT_VFD_WORKS_COMPILE)
+       if (TEST_DIRECT_VFD_WORKS_RUN  MATCHES 0)
+         HDF_FUNCTION_TEST (HAVE_DIRECT)
+@@ -221,7 +226,12 @@
+ # The machine's conversion gets the correct value.  We define the macro and disable
+ # this kind of test until we figure out what algorithm they use.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LONG_SPECIAL  "Checking IF your system converts long double to (unsigned) long values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LDOUBLE_TO_LONG_SPECIAL_RUN 1)
++  set(H5_LDOUBLE_TO_LONG_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine is using a special algorithm
+ # to convert some values of '(unsigned) long' to 'long double' values.  
+@@ -230,7 +240,12 @@
+ # ..., 7fffff..., the compiler uses a unknown algorithm.  We define a 
+ # macro and skip the test for now until we know about the algorithm.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LONG_TO_LDOUBLE_SPECIAL "Checking IF your system can convert (unsigned) long to long double values with special algorithm")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LONG_TO_LDOUBLE_SPECIAL_RUN 1)
++  set(H5_LONG_TO_LDOUBLE_SPECIAL_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # 'long double' to '(unsigned) long long' values.  (This flag should be set for
+@@ -240,7 +255,12 @@
+ # 0x4351ccf385ebc8a0dfcc... or 0x4351ccf385ebc8a0ffcc... will make the converted
+ # values wildly wrong.  This test detects this wrong behavior and disable the test.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LDOUBLE_TO_LLONG_ACCURATE "Checking IF correctly converting long double to (unsigned) long long values")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LDOUBLE_TO_LLONG_ACCURATE_RUN 0)
++  set(H5_LDOUBLE_TO_LLONG_ACCURATE_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Set the flag to indicate that the machine can accurately convert
+ # '(unsigned) long long' to 'long double' values.  (This flag should be set for
+@@ -248,11 +268,21 @@
+ # 007fff..., 00ffff..., 01ffff..., ..., 7fffff..., the converted values are twice
+ # as big as they should be.
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_LLONG_TO_LDOUBLE_CORRECT "Checking IF correctly converting (unsigned) long long to long double values")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_LLONG_TO_LDOUBLE_CORRECT_RUN 0)
++  set(H5_LLONG_TO_LDOUBLE_CORRECT_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ # ----------------------------------------------------------------------
+ # Check if pointer alignments are enforced
+ #
++if(NOT CMAKE_CROSSCOMPILING)
+ H5ConversionTests (H5_NO_ALIGNMENT_RESTRICTIONS "Checking IF alignment restrictions are strictly enforced")
++else(NOT CMAKE_CROSSCOMPILING)
++  set(H5_NO_ALIGNMENT_RESTRICTIONS_RUN 0)
++  set(H5_NO_ALIGNMENT_RESTRICTIONS_COMPILE TRUE)
++endif(NOT CMAKE_CROSSCOMPILING)
+ 
+ # -----------------------------------------------------------------------
+ # wrapper script variables
+diff -r -u a/config/cmake_ext_mod/ConfigureChecks.cmake b/config/cmake_ext_mod/ConfigureChecks.cmake
+--- a/config/cmake_ext_mod/ConfigureChecks.cmake
++++ b/config/cmake_ext_mod/ConfigureChecks.cmake
+@@ -272,12 +272,17 @@
+   # http://www.gnu.org/s/libc/manual/html_node/Feature-Test-Macros.html
+   set (HDF_EXTRA_C_FLAGS -D_POSIX_C_SOURCE=199506L)
+   # _BSD_SOURCE deprecated in GLIBC >= 2.20
++  if(NOT CMAKE_CROSSCOMPILING)
+   TRY_RUN (HAVE_DEFAULT_SOURCE_RUN HAVE_DEFAULT_SOURCE_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DHAVE_DEFAULT_SOURCE
+         OUTPUT_VARIABLE OUTPUT
+     )
++  else(NOT CMAKE_CROSSCOMPILING)
++    set(HAVE_DEFAULT_SOURCE_RUN 1)
++    set(HAVE_DEFAULT_SOURCE_COMPILE TRUE)
++  endif(NOT CMAKE_CROSSCOMPILING)
+   if (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+     set (HDF_EXTRA_FLAGS -D_DEFAULT_SOURCE)
+   else (HAVE_DEFAULT_SOURCE_COMPILE AND HAVE_DEFAULT_SOURCE_RUN)
+@@ -287,12 +292,17 @@
+   option (HDF_ENABLE_LARGE_FILE "Enable support for large (64-bit) files on Linux." ON)
+   if (HDF_ENABLE_LARGE_FILE)
+     set (msg "Performing TEST_LFS_WORKS")
++    if(NOT CMAKE_CROSSCOMPILING)
+     TRY_RUN (TEST_LFS_WORKS_RUN   TEST_LFS_WORKS_COMPILE
+         ${CMAKE_BINARY_DIR}
+         ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+         CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=-DTEST_LFS_WORKS
+         OUTPUT_VARIABLE OUTPUT
+     )
++    else(NOT CMAKE_CROSSCOMPILING)
++      set(TEST_LFS_WORKS_RUN 0)
++      set(TEST_LFS_WORKS_COMPILE TRUE)
++    endif(NOT CMAKE_CROSSCOMPILING)
+     if (TEST_LFS_WORKS_COMPILE)
+       if (TEST_LFS_WORKS_RUN  MATCHES 0)
+         set (TEST_LFS_WORKS 1 CACHE INTERNAL ${msg})
+@@ -702,7 +712,8 @@
+   set (CURRENT_TEST_DEFINITIONS "-DPRINTF_LL_WIDTH")
+   if (${HDF_PREFIX}_SIZEOF_LONG_LONG)
+     set (CURRENT_TEST_DEFINITIONS "${CURRENT_TEST_DEFINITIONS} -DHAVE_LONG_LONG")
+   endif (${HDF_PREFIX}_SIZEOF_LONG_LONG)
++  if(NOT CMAKE_CROSSCOMPILING)
+   TRY_RUN (${HDF_PREFIX}_PRINTF_LL_TEST_RUN   ${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE
+       ${CMAKE_BINARY_DIR}
+       ${HDF_RESOURCES_EXT_DIR}/HDFTests.c
+@@ -722,6 +733,13 @@
+         "Test ${HDF_PREFIX}_PRINTF_LL_WIDTH failed with the following output:\n ${OUTPUT}\n"
+     )
+   endif (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE)
++  else(NOT CMAKE_CROSSCOMPILING)
++    set (${HDF_PREFIX}_PRINTF_LL_TEST_RUN 1)
++    set (${HDF_PREFIX}_PRINTF_LL_TEST_COMPILE 1)
++    set (${HDF_PREFIX}_PRINTF_LL_WIDTH "\"L\"")
++    set (${HDF_PREFIX}_PRINTF_LL "L")
++    set (PRINT_LL_FOUND 1)
++  endif(NOT CMAKE_CROSSCOMPILING)
+ 
+   if (PRINT_LL_FOUND)
+     message (STATUS "Checking for appropriate format for 64 bit long: found ${${HDF_PREFIX}_PRINTF_LL_WIDTH}")
+diff -r -u a/src/CMakeLists.txt b/src/CMakeLists.txt
+--- a/src/CMakeLists.txt
++++ b/src/CMakeLists.txt
+@@ -616,6 +616,7 @@
+   target_link_libraries (H5detect "ws2_32.lib")
+ endif (MSVC OR MINGW)
+ 
++if (NOT CMAKE_CROSSCOMPILING)
+ set (CMD $<TARGET_FILE:H5detect>)
+ add_custom_command (
+     OUTPUT ${HDF5_BINARY_DIR}/H5Tinit.c
+@@ -623,6 +624,7 @@
+     ARGS > ${HDF5_BINARY_DIR}/H5Tinit.c
+     DEPENDS H5detect
+ )
++endif (NOT CMAKE_CROSSCOMPILING)
+ 
+ add_executable (H5make_libsettings ${HDF5_SRC_DIR}/H5make_libsettings.c)
+ TARGET_C_PROPERTIES (H5make_libsettings STATIC " " " ")
+@@ -631,6 +633,10 @@
+ endif (MSVC OR MINGW)
+ 
+ set (CMD $<TARGET_FILE:H5make_libsettings>)
++# for cross compile
++if (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++  set(CMD $ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
++endif (DEFINED ENV{EXTERNAL_H5MAKE_LIBSETTINGS})
+ add_custom_command (
+     OUTPUT ${HDF5_BINARY_DIR}/H5lib_settings.c
+     COMMAND ${CMD}
diff --git a/infra/cmake/packages/HDF5SourceConfig.cmake b/infra/cmake/packages/HDF5SourceConfig.cmake

index 134efa6f445f4a07e2d08fc9351aa2b65a8290d5..9db048c86768418d3b8638bac3214868ee1247db 100644 (file)
--- a/infra/cmake/packages/HDF5SourceConfig.cmake
+++ b/infra/cmake/packages/HDF5SourceConfig.cmake
@@ -9,7 +9,8 @@ function(_HDF5Source_import)
  
    envoption(HDF5_URL https://github.com/HDFGroup/hdf5/archive/hdf5-1_8_16.tar.gz)
  
-  ExternalSource_Download(HDF5 ${HDF5_URL})
+  ExternalSource_Download(HDF5 ${HDF5_URL}
+                          PATCH ${CMAKE_CURRENT_LIST_DIR}/HDF5Source.patch)
  
    set(HDF5Source_DIR ${HDF5_SOURCE_DIR} PARENT_SCOPE)
    set(HDF5Source_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/JsoncppConfig.cmake b/infra/cmake/packages/JsoncppConfig.cmake

new file mode 100644 (file)

index 0000000..3c5c3e7
--- /dev/null
+++ b/infra/cmake/packages/JsoncppConfig.cmake
@@ -0,0 +1,34 @@
+function(_Jsoncpp_import)
+  nnas_find_package(JsoncppSource QUIET)
+
+  if(NOT JsoncppSource_FOUND)
+    set(Jsoncpp_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT JsoncppSource_FOUND)
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${JsoncppSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/JSONCPP/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      IDENTIFIER  "1.9.5"
+                      PKG_NAME    "JSONCPP"
+                      EXTRA_OPTS "-DBUILD_STATIC_LIBS=ON"
+                                 "-DBUILD_SHARED_LIBS=OFF"
+                                 "-DJSONCPP_WITH_TESTS=OFF"
+                                 "-DJSONCPP_WITH_POST_BUILD_UNITTEST=OFF")
+
+  find_path(Jsoncpp_INCLUDE_DIRS
+            NAMES json.h
+            PATHS ${EXT_OVERLAY_DIR}
+            NO_CMAKE_FIND_ROOT_PATH
+            PATH_SUFFIXES include/json)
+  find_file(Jsoncpp_STATIC_LIB
+            NAMES libjsoncpp.a
+            PATHS ${EXT_OVERLAY_DIR}
+            NO_CMAKE_FIND_ROOT_PATH
+            PATH_SUFFIXES lib)
+
+  set(Jsoncpp_FOUND TRUE PARENT_SCOPE)
+endfunction(_Jsoncpp_import)
+
+_Jsoncpp_import()
diff --git a/infra/cmake/packages/JsoncppSourceConfig.cmake b/infra/cmake/packages/JsoncppSourceConfig.cmake

new file mode 100644 (file)

index 0000000..3195ea4
--- /dev/null
+++ b/infra/cmake/packages/JsoncppSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_JsoncppSource_import)
+  if(NOT DOWNLOAD_JSONCPP)
+    set(JsoncppSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_JSONCPP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(JSONCPP_URL https://github.com/open-source-parsers/jsoncpp/archive/refs/tags/1.9.5.tar.gz)
+
+  ExternalSource_Download(JSONCPP ${JSONCPP_URL})
+
+  set(JsoncppSource_DIR ${JSONCPP_SOURCE_DIR} PARENT_SCOPE)
+  set(JsoncppSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_JsoncppSource_import)
+
+_JsoncppSource_import()
diff --git a/infra/cmake/packages/Opencl_HeadersConfig.cmake b/infra/cmake/packages/Opencl_HeadersConfig.cmake

index 8d65fe76e60653b7a406b54a2062e59ec35ad743..ec7c65a739844268ee7fe2943b66d88b9ae47e65 100644 (file)
--- a/infra/cmake/packages/Opencl_HeadersConfig.cmake
+++ b/infra/cmake/packages/Opencl_HeadersConfig.cmake
@@ -2,11 +2,11 @@ function(_Opencl_Headers_import)
    nnas_find_package(Opencl_HeadersSource QUIET)
  
    # NOTE This line prevents multiple definitions of target
-  if(TARGET Headers)
+  if(TARGET OpenCL_Headers)
      set(Opencl_HeadersSource_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
      set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
      return()
-  endif(TARGET Headers)
+  endif(TARGET OpenCL_Headers)
  
    if(NOT Opencl_HeadersSource_FOUND)
      message(STATUS "Opencl_Headers: Source not found")
@@ -14,7 +14,12 @@ function(_Opencl_Headers_import)
      return()
    endif(NOT Opencl_HeadersSource_FOUND)
  
-  add_extdirectory("${Opencl_HeadersSource_DIR}" OPENCL_HEADERS EXCLUDE_FROM_ALL)
+  # We don't need test builds and installs, we only need headers.
+  # add_extdirectory("${Opencl_HeadersSource_DIR}" OPENCL_HEADERS EXCLUDE_FROM_ALL)
+
+  add_library(OpenCL_Headers INTERFACE)
+  target_include_directories(OpenCL_Headers INTERFACE ${Opencl_HeadersSource_DIR})
+
    set(Opencl_Headers_DIR ${Opencl_HeadersSource_DIR} PARENT_SCOPE)
    set(Opencl_Headers_FOUND TRUE PARENT_SCOPE)
  endfunction(_Opencl_Headers_import)
diff --git a/infra/cmake/packages/ProtobufConfig.cmake b/infra/cmake/packages/ProtobufConfig.cmake

index 3c8d2320f1638a76642126ead602e68039470129..f8e9ff1f951e43f3249dd5c3aa665fbd943cd4b1 100644 (file)
--- a/infra/cmake/packages/ProtobufConfig.cmake
+++ b/infra/cmake/packages/ProtobufConfig.cmake
@@ -51,17 +51,34 @@ function(_Protobuf_build)
      return()
    endif(NOT ProtobufSource_FOUND)
  
+  # set 'EXTERNAL_JS_EMBED' environment variable
+  if(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+    if(DEFINED ENV{BUILD_HOST_EXEC})
+      set(EXTERNAL_JS_EMBED $ENV{BUILD_HOST_EXEC}/externals/PROTOBUF/build/js_embed)
+      set(ENV{EXTERNAL_JS_EMBED} ${EXTERNAL_JS_EMBED})
+    endif(DEFINED ENV{BUILD_HOST_EXEC})
+  endif(NOT DEFINED ENV{EXTERNAL_JS_EMBED})
+
    nnas_include(ExternalBuildTools)
    ExternalBuild_CMake(CMAKE_DIR   ${ProtobufSource_DIR}/cmake
                        BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/PROTOBUF/build
                        INSTALL_DIR ${EXT_OVERLAY_DIR}
                        BUILD_FLAGS -fPIC
                        EXTRA_OPTS  -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_WITH_ZLIB=OFF
-                      IDENTIFIER  "3.5.2-fix1"
+                      IDENTIFIER  "3.5.2-fix2"
                        PKG_NAME    "PROTOBUF")
  
  endfunction(_Protobuf_build)
  
+set(PROTOC_PATH $<TARGET_FILE:protobuf::protoc>)
+
+if(DEFINED ENV{BUILD_HOST_EXEC})
+  set(PROTOC_PATH $ENV{BUILD_HOST_EXEC}/overlay/bin/protoc)
+endif(DEFINED ENV{BUILD_HOST_EXEC})
+if(DEFINED ENV{EXTERNAL_PROTOC})
+  set(PROTOC_PATH $ENV{EXTERNAL_PROTOC})
+endif(DEFINED ENV{EXTERNAL_PROTOC})
+
  _Protobuf_build()
  
  if(USE_PROTOBUF_LEGACY_IMPORT)
@@ -96,7 +113,7 @@ if(Protobuf_FOUND)
  
      add_custom_command(OUTPUT ${OUTPUT_FILES}
                         COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
-                       COMMAND "$<TARGET_FILE:protobuf::protoc>" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
+                       COMMAND "${PROTOC_PATH}" --cpp_out "${abs_output_dir}" -I "${abs_proto_dir}" ${PROTO_FILES}
                         DEPENDS ${PROTO_FILES})
  
      set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
diff --git a/infra/cmake/packages/ProtobufSource.patch b/infra/cmake/packages/ProtobufSource.patch

new file mode 100644 (file)

index 0000000..9a83a80
--- /dev/null
+++ b/infra/cmake/packages/ProtobufSource.patch
@@ -0,0 +1,18 @@
+--- a/cmake/libprotoc.cmake
++++ b/cmake/libprotoc.cmake
+@@ -209,10 +209,14 @@
+   ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types/timestamp.js
+ )
+ add_executable(js_embed ${protobuf_source_dir}/src/google/protobuf/compiler/js/embed.cc)
++set(JS_EMBED_EXEC "js_embed")
++if(DEFINED ENV{EXTERNAL_JS_EMBED})
++  set(JS_EMBED_EXEC "$ENV{EXTERNAL_JS_EMBED}")
++endif()
+ add_custom_command(
+   OUTPUT ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+   DEPENDS js_embed ${js_well_known_types_sources}
+-  COMMAND js_embed ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
++  COMMAND ${JS_EMBED_EXEC} ${js_well_known_types_sources} > ${protobuf_source_dir}/src/google/protobuf/compiler/js/well_known_types_embed.cc
+ )
+
+ add_library(libprotoc ${protobuf_SHARED_OR_STATIC}
diff --git a/infra/cmake/packages/ProtobufSourceConfig.cmake b/infra/cmake/packages/ProtobufSourceConfig.cmake

index 6b35ae7dc44e78d2fbdee0904bb8151449c71109..baa49eeb0bdb042a906fd8b74d0c3edf1942d07f 100644 (file)
--- a/infra/cmake/packages/ProtobufSourceConfig.cmake
+++ b/infra/cmake/packages/ProtobufSourceConfig.cmake
@@ -9,7 +9,8 @@ function(_ProtobufSource_import)
  
    envoption(PROTOBUF_URL https://github.com/protocolbuffers/protobuf/archive/v3.5.2.tar.gz)
  
-  ExternalSource_Download(PROTOBUF ${PROTOBUF_URL})
+  ExternalSource_Download(PROTOBUF ${PROTOBUF_URL}
+                          PATCH ${CMAKE_CURRENT_LIST_DIR}/ProtobufSource.patch)
  
    set(ProtobufSource_DIR ${PROTOBUF_SOURCE_DIR} PARENT_SCOPE)
    set(ProtobufSource_FOUND TRUE PARENT_SCOPE)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake

new file mode 100644 (file)

index 0000000..f3663cc
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+  if(NOT DOWNLOAD_GEMMLOWP)
+    set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_GEMMLOWP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.8.0.
+  # See tensorflow/third_party/gemmlowp/workspace.bzl.
+  envoption(TENSORFLOW_2_8_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+  ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.8.0-GEMMLOWP ${TENSORFLOW_2_8_0_GEMMLOWP_URL})
+
+  set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..2ad2e24
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.8.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGpuConfig.cmake b/infra/cmake/packages/TensorFlowGpuConfig.cmake

new file mode 100644 (file)

index 0000000..7a7f786
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuConfig.cmake
@@ -0,0 +1,22 @@
+# TensorFlowGpuConfig.cmake
+
+function(_Build_TfliteGpuDelagate_)
+  nnas_find_package(TensorFlowGpuSource REQUIRED)
+  nnas_find_package(AbseilSource REQUIRED)
+  nnas_find_package(Farmhash REQUIRED)
+  nnas_find_package(Fp16Source REQUIRED)
+
+  if(NOT TARGET TensorFlowGpu)
+    nnas_include(ExternalProjectTools)
+    add_extdirectory("${CMAKE_CURRENT_LIST_DIR}/TensorFlowLiteGpu" TensorFlowLiteGpu)
+  endif()
+  set(TENSORFLOWGPU_SOURCE_DIR ${TENSORFLOWGPU_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGpu_DIR ${TensorFlowGpu_DIR} PARENT_SCOPE)
+endfunction(_Build_TfliteGpuDelagate_)
+
+if(BUILD_TENSORFLOW_LITE_GPU)
+  _Build_TfliteGpuDelagate_()
+  set(TensorFlowGpu_FOUND TRUE PARENT_SCOPE)
+else(BUILD_TENSORFLOW_LITE_GPU)
+  set(TensorFlowGpu_FOUND FALSE PARENT_SCOPE)
+endif(BUILD_TENSORFLOW_LITE_GPU)
diff --git a/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch b/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch

new file mode 100644 (file)

index 0000000..bf423dc
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuSource/patch_for_gpu_cl_build.patch
@@ -0,0 +1,292 @@
+diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h
+index 7892d0ce..fae4fb69 100644
+--- a/tensorflow/lite/delegates/gpu/api.h
++++ b/tensorflow/lite/delegates/gpu/api.h
+@@ -43,11 +43,18 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+ #include "tensorflow/lite/delegates/gpu/common/util.h"
++
++#ifdef TFLITE_GPU_LIB_FIX
+ #include <vulkan/vulkan.h>
++#endif
+ 
+ #define GL_NO_PROTOTYPES
+ #define EGL_NO_PROTOTYPES
++
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
++#endif
++
+ #undef GL_NO_PROTOTYPES
+ #undef EGL_NO_PROTOTYPES
+ 
+@@ -80,6 +87,7 @@ enum class ObjectType {
+   VULKAN_TEXTURE
+ };
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ struct OpenGlBuffer {
+   OpenGlBuffer() = default;
+   explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
+@@ -95,6 +103,7 @@ struct OpenGlTexture {
+   GLuint id = GL_INVALID_INDEX;
+   GLenum format = GL_INVALID_ENUM;
+ };
++#endif
+ 
+ struct OpenClBuffer {
+   OpenClBuffer() = default;
+@@ -111,6 +120,7 @@ struct OpenClTexture {
+   // TODO(akulik): should it specify texture format?
+ };
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ struct VulkanBuffer {
+   VulkanBuffer() = default;
+   explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
+@@ -143,6 +153,7 @@ struct VulkanMemory {
+   VkDeviceSize size;
+   VkDeviceSize offset;
+ };
++#endif
+ 
+ struct CpuMemory {
+   CpuMemory() = default;
+@@ -228,10 +239,15 @@ bool IsValid(const TensorObjectDef& def);
+ // @return the number of elements in a tensor object.
+ uint32_t NumElements(const TensorObjectDef& def);
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ using TensorObject =
+     absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
+                   OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
+-
++#else
++using TensorObject =
++    absl::variant<absl::monostate, CpuMemory,
++                  OpenClBuffer, OpenClTexture>;
++#endif
+ // @return true if object is set and corresponding values are defined.
+ bool IsValid(const TensorObjectDef& def, const TensorObject& object);
+ 
+diff --git a/tensorflow/lite/delegates/gpu/cl/api.h b/tensorflow/lite/delegates/gpu/cl/api.h
+index 65671117..c339f3f0 100644
+--- a/tensorflow/lite/delegates/gpu/cl/api.h
++++ b/tensorflow/lite/delegates/gpu/cl/api.h
+@@ -20,7 +20,9 @@ limitations under the License.
+ #define EGL_NO_PROTOTYPES
+ #endif
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ #include <EGL/egl.h>
++#endif
+ 
+ #include <cstdint>
+ #include <memory>
+@@ -115,9 +117,10 @@ struct InferenceEnvironmentOptions {
+   // It is the error to set egl_display, egl_context AND context at the same
+   // time. If egl_display and egl_context are set, they will be used to create
+   // GL-aware CL context.
++#ifdef TFLITE_GPU_LIB_FIX
+   EGLDisplay egl_display = EGL_NO_DISPLAY;
+   EGLContext egl_context = EGL_NO_CONTEXT;
+-
++#endif //TFLITE_GPU_LIB_FIX
+   // Should contain data returned from
+   // InferenceEnvironment::GetSerializedBinaryCache method.
+   // Invalid or incompatible data will be discarded. Compiled binary may become
+@@ -125,7 +128,11 @@ struct InferenceEnvironmentOptions {
+   absl::Span<const uint8_t> serialized_binary_cache;
+ 
+   bool IsGlAware() const {
++#ifdef TFLITE_GPU_LIB_FIX
+     return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY;
++#else //TFLITE_GPU_LIB_FIX
++    return false;
++#endif //TFLITE_GPU_LIB_FIX
+   }
+ };
+ 
+diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h
+index a5435c4f..e088355b 100644
+--- a/tensorflow/lite/delegates/gpu/cl/arguments.h
++++ b/tensorflow/lite/delegates/gpu/cl/arguments.h
+@@ -23,7 +23,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+ #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/util.h"
+ #include "tensorflow/lite/delegates/gpu/common/access_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -78,11 +80,12 @@ class Arguments : public ArgumentsBinder {
+   ~Arguments() override = default;
+ 
+  private:
++#ifdef TFLITE_GPU_LIB_FIX
+   friend flatbuffers::Offset<data::Arguments> Encode(
+       const Arguments& args, flatbuffers::FlatBufferBuilder* builder);
+   friend absl::Status Decode(CLContext* context, const data::Arguments* fb_args,
+                              Arguments* args);
+-
++#endif
+   void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
+   void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
+   void AddImage2DArray(const std::string& name,
+diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_object.h b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+index abd77a44..ac1b7f00 100644
+--- a/tensorflow/lite/delegates/gpu/cl/gpu_object.h
++++ b/tensorflow/lite/delegates/gpu/cl/gpu_object.h
+@@ -23,7 +23,9 @@ limitations under the License.
+ 
+ #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/common/access_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -165,10 +167,12 @@ class GPUObjectDescriptor {
+   AccessType GetAccess() const { return access_type_; }
+ 
+  protected:
++#ifdef TFLITE_GPU_LIB_FIX
+   friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
+       const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
+   friend void Decode(const data::GPUObjectDescriptor* fb_obj,
+                      GPUObjectDescriptor* obj);
++#endif
+   mutable std::map<std::string, std::string> state_vars_;
+   AccessType access_type_;
+ };
+diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+index ca0c0319..f3cbc863 100644
+--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
++++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+@@ -151,6 +151,7 @@ CLNode& CLNode::operator=(CLNode&& node) {
+   return *this;
+ }
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ absl::Status InferenceContext::InitFromGraph(
+     const CreateInferenceInfo& create_info, const GraphFloat32& graph,
+     Environment* env, std::vector<uint8_t>* serialized_model) {
+@@ -239,6 +240,7 @@ absl::Status InferenceContext::RestoreDeserialized(
+   }
+   return absl::OkStatus();
+ }
++#endif
+ 
+ absl::Status InferenceContext::InitFromGraphWithTransforms(
+     const CreateInferenceInfo& create_info, GraphFloat32* graph,
+diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h
+index ec8055eb..871af9dd 100644
+--- a/tensorflow/lite/delegates/gpu/cl/inference_context.h
++++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h
+@@ -31,7 +31,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/model_hints.h"
+ #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+ #include "tensorflow/lite/delegates/gpu/cl/precision.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/model.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+@@ -100,12 +102,14 @@ class InferenceContext {
+  private:
+   enum TensorMemoryType { STRONG_SHAPE = 0, BUFFER = 1, VARIABLE = 2 };
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+   friend flatbuffers::Offset<data::InferenceContext> Encode(
+       const InferenceContext& inference,
+       flatbuffers::FlatBufferBuilder* builder);
+   friend absl::Status Decode(CLContext* context,
+                              const data::InferenceContext* fb_inference,
+                              InferenceContext* inference);
++#endif
+ 
+   void CopyInAndOutIds(const GraphFloat32& graph);
+   absl::Status ConvertOperations(const DeviceInfo& device_info,
+diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+index 57d8690c..8178e2de 100644
+--- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
++++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h
+@@ -30,7 +30,9 @@ limitations under the License.
+ #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h"
+ #include "tensorflow/lite/delegates/gpu/cl/precision.h"
+ #include "tensorflow/lite/delegates/gpu/cl/program_cache.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/serialization_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+ #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+ #include "tensorflow/lite/delegates/gpu/common/data_type.h"
+@@ -169,11 +171,12 @@ class GPUOperation {
+   bool check_src_channels_size_ = false;
+ 
+  protected:
++#ifdef TFLITE_GPU_LIB_FIX
+   friend flatbuffers::Offset<data::GPUOperation> Encode(
+       const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
+   friend absl::Status Decode(CLContext* context,
+                              const data::GPUOperation* fb_op, GPUOperation* op);
+-
++#endif
+   virtual absl::Status BindArguments(ArgumentsBinder* args) {
+     return absl::OkStatus();
+   }
+diff --git a/tensorflow/lite/delegates/gpu/cl/program_cache.cc b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
+index 285aa06d..f636a909 100644
+--- a/tensorflow/lite/delegates/gpu/cl/program_cache.cc
++++ b/tensorflow/lite/delegates/gpu/cl/program_cache.cc
+@@ -18,9 +18,13 @@ limitations under the License.
+ #include <cstdint>
+ #include <string>
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/cl_program.h"
++#ifdef TFLITE_GPU_LIB_FIX
+ #include "tensorflow/lite/delegates/gpu/cl/compiled_program_cache_generated.h"
++#endif
+ #include "tensorflow/lite/delegates/gpu/cl/util.h"
+ #include "tensorflow/lite/delegates/gpu/common/status.h"
+ #include <farmhash.h>
+@@ -82,6 +86,7 @@ absl::Status ProgramCache::GetOrCreateCLKernel(const std::string& code,
+   return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
+ }
+ 
++#ifdef TFLITE_GPU_LIB_FIX
+ absl::Status ProgramCache::AddSerializedCache(
+     const CLContext& context, const CLDevice& device,
+     absl::Span<const uint8_t> serialized_cache) {
+@@ -143,6 +148,7 @@ absl::Status ProgramCache::GetSerializedCache(
+               builder.GetSize());
+   return absl::OkStatus();
+ }
++#endif
+ 
+ }  // namespace cl
+ }  // namespace gpu
+diff --git a/tensorflow/lite/delegates/gpu/common/types.h b/tensorflow/lite/delegates/gpu/common/types.h
+index 4ddb46f3..2b692f0b 100644
+--- a/tensorflow/lite/delegates/gpu/common/types.h
++++ b/tensorflow/lite/delegates/gpu/common/types.h
+@@ -34,9 +34,9 @@ class alignas(2) half {
+   HalfBits bits;
+ 
+   half() = default;
+-
++#ifdef TFLITE_GPU_LIB_FIX
+   half(const half& f) : bits(f.bits) {}
+-
++#endif
+   explicit half(float other) { bits = fp16_ieee_from_fp32_value(other); }
+ 
+   void operator=(float f) { *this = half(f); }
diff --git a/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake

new file mode 100644 (file)

index 0000000..f1debe7
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGpuSourceConfig.cmake
@@ -0,0 +1,74 @@
+#
+# Download Tensorflow 2.4.1, use gpu delegate codes only
+#
+
+function(_TensorFlowGpuSource_Import)
+  SET(PATCH_FILE_CHECK "20211014")
+  SET(DATE_STAMP_PATH "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU.stamp")
+
+  set(PATCH_DONE FALSE)
+  if(EXISTS ${DATE_STAMP_PATH})
+    file(STRINGS ${DATE_STAMP_PATH} OBTAINED_CONTENT)
+    if(${OBTAINED_CONTENT} STREQUAL "${PATCH_FILE_CHECK}")
+      set(PATCH_DONE "TRUE")
+    endif()
+  endif()
+  
+  if(${PATCH_DONE} STREQUAL "TRUE")
+    message(STATUS "Skip downloading TensorFlowGpuSource")
+    set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU" PARENT_SCOPE)
+    set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
+    set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
+    return()
+  else(${PATCH_DONE} STREQUAL "TRUE")
+    # PATCH_DONE FALSE
+    message(STATUS "TensorFlowGpuSource patch not found!")
+  endif(${PATCH_DONE} STREQUAL "TRUE")
+
+  # Download TFLite Source Code
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+  envoption(TENSORFLOW_2_4_1_URL https://github.com/tensorflow/tensorflow/archive/v2.4.1.tar.gz)
+  ExternalSource_Download(TFLITE_GPU_DELEGATE DIRNAME TENSORFLOW-2.4.1 ${TENSORFLOW_2_4_1_URL})
+
+  # Patch for non used codes on onert backend/gpu_cl
+  # ToDo: Do it more simpler
+  set(TENSORFLOWGPU_SOURCE_DIR "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU")
+
+  # remove & copy gpu delegate source codes only
+  if(EXISTS ${TENSORFLOWGPU_SOURCE_DIR})
+    file(REMOVE_RECURSE "${TENSORFLOWGPU_SOURCE_DIR}")
+  endif()
+
+  file(MAKE_DIRECTORY "${TENSORFLOWGPU_SOURCE_DIR}")
+  execute_process(
+    WORKING_DIRECTORY "${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
+    COMMAND bash -c "cp -r --parents ./tensorflow/lite/delegates/gpu ../TENSORFLOW_GPU"
+  )
+
+  # Create Stamp
+  set(_remove_path "${TENSORFLOWGPU_SOURCE_DIR}.stamp")
+  if(EXISTS ${_remove_path})
+    file(REMOVE ${_remove_path})
+  endif()
+  execute_process(
+    WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}/TENSORFLOW_GPU"
+    COMMAND bash -c "patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/TensorFlowGpuSource/patch_for_gpu_cl_build.patch"
+  )
+  file(WRITE ${DATE_STAMP_PATH} "${PATCH_FILE_CHECK}")
+  set(TENSORFLOWGPU_SOURCE_DIR "${TENSORFLOWGPU_SOURCE_DIR}" PARENT_SCOPE)
+  set(TensorFlowGpuSource_DIR "${TensorFlowGpuSource_DIR}" PARENT_SCOPE)
+  set(TensorFlowGpuSource_FOUND TRUE PARENT_SCOPE)
+
+  execute_process(
+    WORKING_DIRECTORY "${NNAS_EXTERNALS_DIR}"
+    COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}.stamp"
+    COMMAND bash -c "rm -rf ${TFLITE_GPU_DELEGATE_SOURCE_DIR}"
+  )
+endfunction(_TensorFlowGpuSource_Import)
+
+if(NOT TensorFlowGpuSource_FOUND)
+   _TensorFlowGpuSource_Import()
+else()
+  set(TensorFlowGpuSource_FOUND FALSE PARENT_SCOPE)
+endif(NOT TensorFlowGpuSource_FOUND)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt

index c35617497e85a007d1986c588fc8576cb5066869..a57d7f4cbb111bb9107f1cb39e86f9eeb1cec4cc 100644 (file)
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/Lite/CMakeLists.txt
@@ -1,7 +1,9 @@
  # NOTE The followings SHOULD be defined before using this CMakeLists.txt
+# NOTE TensorFlow 1.13.1 uses flatbuffers-1.10
+#      but we use flatbuffers-2.0 to match with all other modules flatbuffers version.
  #
  #  'TensorFlowSource_DIR' variable
-#  'FlatBuffersSource_DIR' variable
+#  'flatbuffers-2.0' target
  #  'eigen' target
  #  'gemmlowp' target
  #  'neon2sse' target
@@ -37,10 +39,9 @@ CHECK_CXX_COMPILER_FLAG(-Wno-extern-c-compat COMPILER_SUPPORT_EXTERN_C_COMPAT_WA
  add_library(tensorflowlite-1.13.1 ${SRCS})
  set_target_properties(tensorflowlite-1.13.1 PROPERTIES POSITION_INDEPENDENT_CODE ON)
  target_include_directories(tensorflowlite-1.13.1 PUBLIC ${TensorFlowSource_DIR})
-target_include_directories(tensorflowlite-1.13.1 PUBLIC ${FlatBuffersSource_DIR}/include)
  target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-ignored-attributes)
  if(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
    target_compile_options(tensorflowlite-1.13.1 PUBLIC -Wno-extern-c-compat)
  endif(COMPILER_SUPPORT_EXTERN_C_COMPAT_WARNING)
  target_compile_definitions(tensorflowlite-1.13.1 PUBLIC "GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK")
-target_link_libraries(tensorflowlite-1.13.1 eigen gemmlowp neon2sse farmhash abseil dl)
+target_link_libraries(tensorflowlite-1.13.1 flatbuffers-2.0 eigen gemmlowp neon2sse farmhash abseil dl)
diff --git a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake

index 2c6bd9f7a18b1038134b94cf12b33ecb2b583ee4..ea20658508d7712d5d01136cd154534c6cdf00ee 100644 (file)
--- a/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
+++ b/infra/cmake/packages/TensorFlowLite-1.13.1/TensorFlowLiteConfig.cmake
@@ -6,12 +6,12 @@ function(_TensorFlowLite_import)
      return()
    endif(NOT TensorFlowSource_FOUND)
  
-  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
+  nnas_find_package(FlatBuffers EXACT 2.0 QUIET)
  
-  if(NOT FlatBuffersSource_FOUND)
+  if(NOT FlatBuffers_FOUND)
      set(TensorFlowLite_FOUND FALSE PARENT_SCOPE)
      return()
-  endif(NOT FlatBuffersSource_FOUND)
+  endif(NOT FlatBuffers_FOUND)
  
    nnas_find_package(Farmhash QUIET)
  
diff --git a/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt b/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt

new file mode 100644 (file)

index 0000000..c69e0bb
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowLiteGpu/CMakeLists.txt
@@ -0,0 +1,72 @@
+#
+# Tensorflow Lite GPU delegate library 2.4.1
+#
+
+set(LIB_TENSORFLOW_GPU_DELEGATE "TensorFlowGpu")
+
+#TENSORFLOWGPU_SOURCE_DIR
+set(REF_TENSORFLOW_SRC_BASE ${TENSORFLOWGPU_SOURCE_DIR})
+set(REF_TENSORFLOW_LITE_SRC_BASE ${REF_TENSORFLOW_SRC_BASE}/tensorflow/lite)
+set(REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE "${REF_TENSORFLOW_LITE_SRC_BASE}/delegates/gpu")
+
+set(SRC_BASE "${REF_TENSORFLOW_LITE_GPU_DELEGATE_SRC_BASE}")
+file(GLOB GPU_CL_SRC_LIST   "${SRC_BASE}/cl/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*.cc"
+                                    "${SRC_BASE}/cl/kernels/special/*.cc"
+                                    "${SRC_BASE}/cl/kernels/selectors/*.cc"
+                                    "${SRC_BASE}/cl/selectors/*.cc"
+                                    "${SRC_BASE}/common/*.cc"
+# Available, but not needed yet
+#                                    "${SRC_BASE}/common/default/*.cc"
+#                                    "${SRC_BASE}/common/memory_managements/*.cc"
+#                                    "${SRC_BASE}/common/transformations/*.cc"
+                                     )
+
+file(GLOB GPU_CL_HDRS_GLOB   "${SRC_BASE}/cl/*.h"
+                                    "${SRC_BASE}/cl/kernels/*.h"
+                                    "${SRC_BASE}/cl/kernels/special/*.h"
+                                    "${SRC_BASE}/cl/kernels/selectors/*.h"
+                                    "${SRC_BASE}/cl/selectors/*.h"
+                                    "${SRC_BASE}/common/*.h"
+                                    "${SRC_BASE}/common/default/*.h"
+                                    "${SRC_BASE}/common/memory_managements/*.h"
+                                    "${SRC_BASE}/common/transformations/*.h"
+                                    )
+list(APPEND GPU_CL_SRC_LIST "${_GPU_CL_HDRS_GLOB}")
+
+file(GLOB REMOVE_TEST_SRCS          "${SRC_BASE}/cl/*_test*.cc"
+                                    "${SRC_BASE}/cl/testing/*.cc"
+                                    "${SRC_BASE}/cl/kernels/*_test*.cc"
+                                    "${SRC_BASE}/common/*_test*.cc"
+                                    "${SRC_BASE}/common/transformations/*_test*.cc"
+                                    )
+# Not available
+file(GLOB REMOVE_SRCS               "${SRC_BASE}/cl/*gl*.cc"
+                                    "${SRC_BASE}/cl/gpu_api_delegate.cc"
+                                    "${SRC_BASE}/cl/serialization.cc"
+                                    "${SRC_BASE}/common/lstm_parser.cc"
+                                    "${SRC_BASE}/common/model_builder.cc"
+                                    "${SRC_BASE}/common/model_builder_helper.cc"
+                                    "${SRC_BASE}/common/object_reader.cc"
+                                    "${SRC_BASE}/common/quantization_util.cc"
+                                    "${SRC_BASE}/common/memory_management/*_test.cc"
+                                    )
+
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_TEST_SRCS})
+list(REMOVE_ITEM GPU_CL_SRC_LIST ${REMOVE_SRCS})
+list(APPEND TFLITE_GPU_SRCS ${GPU_CL_SRC_LIST})
+
+add_library(${LIB_TENSORFLOW_GPU_DELEGATE} STATIC ${TFLITE_GPU_SRCS})
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${Fp16Source_DIR}/include")
+target_include_directories(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "${TENSORFLOWGPU_SOURCE_DIR}")
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE abseil farmhash fp16)
+
+add_library(tflitegpu_ignore_warnings INTERFACE)
+target_compile_options(tflitegpu_ignore_warnings INTERFACE -Wno-unused-parameter -Wno-sign-compare)
+target_link_libraries(${LIB_TENSORFLOW_GPU_DELEGATE} INTERFACE tflitegpu_ignore_warnings)
+
+# GL codes are not used on gpu_cl
+target_compile_options(${LIB_TENSORFLOW_GPU_DELEGATE} PRIVATE "-DCL_DELEGATE_NO_GL")
+
+# Applying PIC first, currently used on gpu_cl only
+set_target_properties(${LIB_TENSORFLOW_GPU_DELEGATE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake

new file mode 100644 (file)

index 0000000..4abe2ea
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(TENSORFLOW_2_8_0_URL https://github.com/tensorflow/tensorflow/archive/v2.8.0.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.8.0 ${TENSORFLOW_2_8_0_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..2ad2e24
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.8.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/command/format b/infra/command/format

index 75e6184d32ef8f1489998fdb1dd73e99d02fe141..5cf9606fa692dd50bb14e7358fce4fce466995a7 100644 (file)
--- a/infra/command/format
+++ b/infra/command/format
@@ -4,6 +4,7 @@ INVALID_EXIT=0
  FILES_TO_CHECK=()
  DIRECTORIES_TO_BE_TESTED=()
  DIRECTORIES_NOT_TO_BE_TESTED=()
+DEFAULT_CLANG_FORMAT="clang-format-8"
  CLANG_FORMAT_CANDIDATES=()
  PATCH_FILE=format.patch
  CHECK_DIFF_ONLY="0"
@@ -16,7 +17,7 @@ function Usage()
    echo "If <file>s are given, it reformats the files"
    echo ""
    echo "Options:"
-  echo "      --clang-format <TOOL>     clang format bin (default: clang-format-3.9, clang-format)"
+  echo "      --clang-format <TOOL>     clang format bin (default: $DEFAULT_CLANG_FORMAT)"
    echo "      --diff-only               check diff files with master"
    echo "      --staged-only             check git staged files"
  }
@@ -65,39 +66,14 @@ function command_exists() {
    command -v $1 > /dev/null 2>&1
  }
  
-function exclude_symbolic_links() {
-  # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
-  if [[ ${#FILES_TO_CHECK} -ne 0 ]]; then
-    FILES_EXCLUDE_SYMLINKS=$(file ${FILES_TO_CHECK} | grep -v "symbolic link" | cut -d':' -f1)
-    FILES_TO_CHECK=${FILES_EXCLUDE_SYMLINKS}
-  fi
-}
-
  function check_newline() {
-  FILES_TO_CHECK_CR=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore style checking
-    if [[ ${f} == !(*.svg|*.pdf|*.png) ]]; then
-      FILES_TO_CHECK_CR+=("${f}")
-    fi
-  done
+  # Exclude binary (refer .gitattributes file)
+  # TODO Remove svg file excluding
+  #   .svg: xml type ML for vector graphic
+  FILES_TO_CHECK_EOF=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep -v '((\.caffemodel)|(\.png)|(\.pdf)|(\.h5)|(\.pdf)|(\.tar.gz)|(\.tflite)|(\.pdf)|(\.bmp)|(\.svg))$'`
  
-  # Check all files (CMakeLists.txt, *.cl, ... not only for C++, Python)
-  if [[ ${#FILES_TO_CHECK_CR} -ne 0 ]]; then
-    CRCHECK=$(file ${FILES_TO_CHECK_CR} | grep 'with CR')
-  else
-    return
-  fi
-  FILES_TO_FIX=($(echo "$CRCHECK" | grep "with CRLF line" | cut -d':' -f1))
-  for f in ${FILES_TO_FIX[@]}; do
-    tr -d '\r' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
-  done
-  FILES_TO_FIX=($(echo "${CRCHECK}" | grep "with CR line" | cut -d':' -f1))
-  for f in ${FILES_TO_FIX[@]}; do
-    tr '\r' '\n' < $f > $f.fixed && cat $f.fixed > $f && rm $f.fixed
-  done
-  # Check no new line at end of file
-  for f in ${FILES_TO_CHECK_CR[@]}; do
+  for f in ${FILES_TO_CHECK_EOF[@]}; do
+    # Check no new line at end of file
      if diff /dev/null "$f" | tail -1 | grep '^\\ No newline' > /dev/null; then
        echo >> "$f"
      fi
@@ -106,23 +82,19 @@ function check_newline() {
  
  function check_permission() {
    # Check all files except script
-  FILES_TO_CHECK_PERMISSION=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore permission checking
-    if [[ ${f} == !(nnas|nnfw|nncc|*.sh|*.py|*/gradlew|infra/debian/compiler/rules|infra/debian/runtime/rules) ]] \
-           || [[ ${f} == tests/nnapi/specs/**/*.py ]]; then
-      FILES_TO_CHECK_PERMISSION+=("${f}")
-    fi
-  done
+  # Manually ignore permission checking
+  FILES_TO_CHECK_PERMISSION=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep '^100755' | cut -f2)
+  FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | tr ' ' '\n' | egrep -v '((^nnas)|(^nnfw)|(^nncc)|(\.sh)|(\.py)|(/gradlew))$'`
+  FILES_TO_CHECK_PERMISSION=`echo "$FILES_TO_CHECK_PERMISSION" | egrep -v '((^infra/debian/compiler/rules)|(^infra/debian/runtime/rules))$'`
+  FILES_TO_CHECK_PERMISSION+=`echo && echo "$FILES_TO_CHECK" | egrep '^tests/nnapi/specs/.*.py$'`
+  # Transform to array
+  FILES_TO_CHECK_PERMISSION=($FILES_TO_CHECK_PERMISSION)
  
    if [[ ${#FILES_TO_CHECK_PERMISSION} -eq 0 ]]; then
      return
    fi
-  for FILE_TO_CHECK in ${FILES_TO_CHECK_PERMISSION[@]}; do
-    RESULT=$(stat -c '%A' ${FILE_TO_CHECK} | grep 'x')
-    if [ "${RESULT}" != "" ]; then
-      chmod a-x ${FILE_TO_CHECK}
-    fi
+  for f in ${FILES_TO_CHECK_PERMISSION[@]}; do
+    chmod a-x $f
    done
  }
  
@@ -132,7 +104,7 @@ function check_cpp_files() {
      return
    fi
  
-  CLANG_FORMAT_CANDIDATES+=("clang-format-8")
+  CLANG_FORMAT_CANDIDATES+=($DEFAULT_CLANG_FORMAT)
    for CLANG_FORMAT_CANDIDATE in ${CLANG_FORMAT_CANDIDATES[@]}; do
      if command_exists ${CLANG_FORMAT_CANDIDATE} ; then
        CLANG_FORMAT="${CLANG_FORMAT_CANDIDATE}"
@@ -141,25 +113,18 @@ function check_cpp_files() {
    done
  
    if [[ -z ${CLANG_FORMAT}  ]]; then
-    echo "[ERROR] clang-format-8 is unavailable"
+    echo "[ERROR] $CLANG_FORMAT is unavailable"
      echo
-    echo "        Please install clang-format-8 before running format check"
+    echo "        Please install $DEFAULT_CLANG_FORMAT before running format check"
      exit 1
    fi
  
-  # Check c++ files
-  FILES_TO_CHECK_CPP=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # Manually ignore style checking
-    if [[ ${f} == +(*/NeuralNetworks.h|*/NeuralNetworksExtensions.h) ]]; then
-      continue
-    fi
-
-    # File extension to check
-    if [[ ${f} == +(*.h|*.hpp|*.cpp|*.cc|*.c|*.cl) ]]; then
-      FILES_TO_CHECK_CPP+=("${f}")
-    fi
-  done
+  # Check c++ files: replace ' ' with newline, check with grep
+  FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '((\.c[cl]?)|(\.cpp)|(\.h(pp)?))$'`
+  # Manually ignore style checking
+  FILES_TO_CHECK_CPP=`echo "$FILES_TO_CHECK_CPP" | egrep -v '((/NeuralNetworks\.h)|(/NeuralNetworksExtensions\.h))$'`
+  # Transform to array
+  FILES_TO_CHECK_CPP=($FILES_TO_CHECK_CPP)
  
    # Skip by '.FORMATDENY' file
    for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
@@ -189,21 +154,12 @@ function check_python_files() {
    fi
  
    # Check python files
-  FILES_TO_CHECK_PYTHON=()
-  for f in ${FILES_TO_CHECK[@]}; do
-    # File extension to check
-    if [[ ${f} == *.py ]]; then
-      FILES_TO_CHECK_PYTHON+=("${f}")
-    fi
-    # Exceptional case: one-cmds don't have '.py' extension
-    if [[ ${f} == compiler/one-cmds/* ]]; then
-      # Ignore non-python source (cmake, etc)
-      # Ignore shell script: one-prepare-venv
-      if [[ ${f} != compiler/one-cmds/*.* ]] && [[ ${f} != compiler/one-cmds/one-prepare-venv ]]; then
-        FILES_TO_CHECK_PYTHON+=("${f}")
-      fi
-    fi
-  done
+  FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK" | tr ' ' '\n' | egrep '\.py$'`
+  # Exceptional case: one-cmds don't have '.py' extension: ignore non-python source (cmake, etc) and ignore shell script: one-prepare-venv
+  FILES_TO_CHECK_PYTHON=`echo "$FILES_TO_CHECK_PYTHON" | egrep -v '^compiler/one-cmds/.*\..*$' | egrep -v '^compiler/one-cmds/one-prepare-venv$'`
+  # Transform to array
+  FILES_TO_CHECK_PYTHON=($FILES_TO_CHECK_PYTHON)
+
    for s in ${DIRECTORIES_NOT_TO_BE_TESTED[@]}; do
      skip=${s#'.'/}/
      FILES_TO_CHECK_PYTHON=(${FILES_TO_CHECK_PYTHON[*]/$skip*/})
@@ -228,7 +184,13 @@ fi
  __Check_CPP=${CHECK_CPP:-"1"}
  __Check_PYTHON=${CHECK_PYTHON:-"1"}
  
-FILES_TO_CHECK=$(git ls-files -c --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]})
+# Git file mode
+#   120000: symbolic link
+#   160000: git link
+#   100755: regular executable
+#   100644: regular readable
+# Reference: https://github.com/git/git/blob/cd42415/Documentation/technical/index-format.txt#L72-L81
+FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${DIRECTORIES_TO_BE_TESTED[@]} | egrep -v '^1[26]0000' | cut -f2)
  if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
    MASTER_EXIST=$(git rev-parse --verify master)
    CURRENT_BRANCH=$(git branch | grep \* | cut -d ' ' -f2-)
@@ -243,6 +205,7 @@ if [[ "${CHECK_DIFF_ONLY}" = "1" ]]; then
      else
        FILES_TO_CHECK=$(git diff --name-only --diff-filter=d HEAD~${DIFF_COMMITS})
      fi
+    FILES_TO_CHECK=$(git ls-files -c -s --exclude-standard ${FILES_TO_CHECK[@]} | egrep -v '^1[26]0000' | cut -f2)
    fi
  fi
  
@@ -250,7 +213,6 @@ for DIR_NOT_TO_BE_TESTED in $(git ls-files -co --exclude-standard '*/.FORMATDENY
    DIRECTORIES_NOT_TO_BE_TESTED+=($(dirname "${DIR_NOT_TO_BE_TESTED}"))
  done
  
-exclude_symbolic_links
  check_newline
  check_permission
  check_cpp_files
diff --git a/infra/debian/compiler/control b/infra/debian/compiler/control

index 99fa479f298b91a9436995583db0ade621ede50a..b3a3c1bf72a7a9ce3d6f3a2ea57d19f880b19da6 100644 (file)
--- a/infra/debian/compiler/control
+++ b/infra/debian/compiler/control
@@ -2,14 +2,14 @@ Source: one
  Section: devel
  Priority: extra
  Maintainer: Neural Network Acceleration Solution Developers <nnfw@samsung.com>
-Build-Depends: cmake, debhelper (>=9), dh-python, python3-all
+Build-Depends: cmake, debhelper (>=9), dh-python, python3-all, python3.8, python3.8-venv
  Standards-Version: 3.9.8
  Homepage: https://github.com/Samsung/ONE
  
  Package: one-compiler
  Architecture: amd64
  Multi-Arch: foreign
-Depends: ${misc:Depends}, ${shlibs:Depends}, python3-venv, python3-pip
+Depends: ${misc:Depends}, ${shlibs:Depends}, python3-venv, python3-pip, python3.8, python3.8-venv
  Description: On-device Neural Engine compiler package
  
  Package: one-compiler-dev
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install

index cbca4780222a2a3e205755eaadd3db9138c374f2..805ba8677c9adc63b2763557e3518b2f790206ce 100644 (file)
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -1,7 +1,7 @@
  # {FILES_TO_INSTALL} {DEST_DIR}
  # bin
  usr/bin/circle2circle usr/share/one/bin/
-usr/bin/circle_partitioner usr/share/one/bin/
+usr/bin/circle-partitioner usr/share/one/bin/
  usr/bin/circle-quantizer usr/share/one/bin/
  usr/bin/generate_bcq_metadata.py usr/share/one/bin/
  usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
@@ -22,6 +22,8 @@ usr/bin/one-prepare-venv usr/share/one/bin/
  usr/bin/one-profile usr/share/one/bin/
  usr/bin/one-quantize usr/share/one/bin/
  usr/bin/one-version usr/share/one/bin/
+usr/bin/onelib/constant.py usr/share/one/bin/onelib/
+usr/bin/onelib/make_cmd.py usr/share/one/bin/onelib/
  usr/bin/rawdata2hdf5 usr/share/one/bin/
  usr/bin/record-minmax usr/share/one/bin/
  usr/bin/tf2nnpkg usr/share/one/bin/
diff --git a/infra/debian/compiler/postinst b/infra/debian/compiler/postinst

index a339d06c1be78e00f02fc5016334e76f9c9457e6..d84e8e0429c46989de0e286733e4d76b964c5f78 100644 (file)
--- a/infra/debian/compiler/postinst
+++ b/infra/debian/compiler/postinst
@@ -9,4 +9,4 @@ set -e
  # which causes invalid permission problem.
  # e.g. When `pip` installs user packages, it proceeds based on $HOME.
  # To proper installation, $HOME should be root.
-su - $(whoami) -c '/usr/share/one/bin/one-prepare-venv' # $(whoami) = root
+su - $(whoami) -p -c '/usr/share/one/bin/one-prepare-venv' # $(whoami) = root
diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules

index e42faae09557411edd329384ad03c113957ea686..145634d030d5289899e3a629b0d0b278ffd21656 100755 (executable)
--- a/infra/debian/compiler/rules
+++ b/infra/debian/compiler/rules
@@ -1,7 +1,7 @@
  #!/usr/bin/make -f
  export DH_VERBOSE = 1
  export NNAS_BUILD_PREFIX = build
-export PRESET = 20210910
+export PRESET = 20220323
  export _DESTDIR = debian/tmp/usr
  
  %:
diff --git a/infra/docker/bionic/Dockerfile b/infra/docker/bionic/Dockerfile

index c3d5b3e950b04cb0297fa3a6f02ac9e9b952bbef..dbc22a6e84695658f4b97a3a95f0d497876f3f6a 100644 (file)
--- a/infra/docker/bionic/Dockerfile
+++ b/infra/docker/bionic/Dockerfile
@@ -41,9 +41,12 @@ RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
  # Additonal tools
  RUN apt-get update && \
      DEBIAN_FRONTEND=noninteractive \
-    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-3.9 clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
+    apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
+RUN apt-get update && apt-get -qqy install python3.8 python3.8-venv
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy
+RUN python3.8 -m pip install --upgrade pip
+RUN python3.8 -m pip install numpy
  
  # Install google test (source)
  RUN apt-get update && apt-get -qqy install libgtest-dev
diff --git a/infra/docker/focal/Dockerfile b/infra/docker/focal/Dockerfile

index 45faa6975ec1adcd42e26c98d1fd82744e3049ea..6f3cd9b609794e7b16f72623bcffd945f9852825 100644 (file)
--- a/infra/docker/focal/Dockerfile
+++ b/infra/docker/focal/Dockerfile
@@ -31,12 +31,12 @@ RUN apt-get update && apt-get -qqy install libboost-all-dev libgflags-dev libgoo
  # Install protocol buffer
  RUN apt-get update && apt-get -qqy install libprotobuf-dev protobuf-compiler
  
-# Additonal tools (except clang-format-3.9)
+# Additonal tools
  RUN apt-get update && \
      DEBIAN_FRONTEND=noninteractive \
      apt-get -qqy install doxygen graphviz wget zip unzip clang-format-8 python3 python3-pip python3-venv hdf5-tools pylint curl
-RUN pip3 install --upgrade pip
-RUN pip3 install yapf==0.22.0 numpy
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install yapf==0.22.0 numpy
  
  # Install google test (source)
  RUN apt-get update && apt-get -qqy install libgtest-dev
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt

index bde68493807e75395890a1d87b885dd16846116d..2ff5a5f6af8964483b2d521bbe4cfd9dfe2f3130 100644 (file)
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -40,12 +40,19 @@ macro(nnas_include PREFIX)
  endmacro(nnas_include)
  
  macro(nnas_find_package PREFIX)
-  find_package(${PREFIX} CONFIG NO_DEFAULT_PATH
-    PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
-    ${ARGN}
-  )
+  find_package(${PREFIX}
+               CONFIG NO_DEFAULT_PATH
+               PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages
+               ${ARGN})
  endmacro(nnas_find_package)
  
+macro(nnas_find_package_folder PREFIX FIND_FOLDER)
+  find_package(${PREFIX}
+               CONFIG NO_DEFAULT_PATH
+               PATHS ${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/packages ${FIND_FOLDER}
+               ${ARGN})
+endmacro(nnas_find_package_folder)
+
  # nncc_find_resource(NAME) will update the following variables
  #
  #   NAME_FOUND
@@ -81,31 +88,12 @@ message(STATUS "Use '${CMAKE_BUILD_TYPE}' configuration")
  #
  set(THREADS_PREFER_PTHREAD_FLAG TRUE)
  
-###
-### Configuration
-###
-option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
-option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
-option(DOWNLOAD_EIGEN "Download Eigen source" ON)
-option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
-option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
-option(DOWNLOAD_RUY "Download ruy source" ON)
-option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
-option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
-option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
-option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
-option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
-option(DOWNLOAD_CAFFE "Download Caffe source" ON)
-option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
-option(DOWNLOAD_ONNX "Download ONNX source" ON)
-option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
-option(DOWNLOAD_OPENCL_HEADERS "Download OpenCl Header source" ON)
-option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
-
-option(DOWNLOAD_GTEST "Download Google Test source" ON)
-option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
-option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
-option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+# identify platform: HOST_PLATFORM, TARGET_PLATFORM and related
+# note: this should be placed before flags and options setting
+nnas_include(IdentifyPlatform)
+
+# Configuration flags
+include("cmake/CfgOptionFlags.cmake")
  
  nnas_find_package(GTest QUIET)
  
@@ -124,16 +112,9 @@ if(${ENABLE_TEST})
    include(CTest)
  endif(${ENABLE_TEST})
  
-option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
-
-# This option might be turned ON for Windows native build.
-# Check our ProtobufConfig.cmake for its usage.
-option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
-
-# This option might be turned ON for MCU builds of luci related components.
-# It specify which library type to use for build:
-# if set ON - luci libraries are static, otherwise - shared.
-option(STATIC_LUCI "Build luci as a static libraries" OFF)
+# apply compilation flags
+# NOTE this should be after all option
+include("cmake/ApplyCompileFlags.cmake")
  
  ###
  ### Target
diff --git a/infra/nncc/Makefile.arm32 b/infra/nncc/Makefile.arm32

new file mode 100644 (file)

index 0000000..22d96e7
--- /dev/null
+++ b/infra/nncc/Makefile.arm32
@@ -0,0 +1,146 @@
+#
+# NOTE this is provided as experimental Makefile to ARM32 cross building
+#      some modules of compiler.
+#
+
+BUILD_TYPE?=Debug
+
+CURRENT_DIR=$(shell pwd)
+BUILDFOLDER=build
+ARM32_FOLDER=arm32
+ROOTFS_ARM?=$(CURRENT_DIR)/tools/cross/rootfs/arm
+NNCC_CFG_OPTION_EXTRA?=
+
+TYPE_FOLDER=$(shell echo $(BUILD_TYPE) | tr A-Z a-z)
+
+BUILD_ARM32_FOLDER=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER)
+BUILD_ARM32_HOST=$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host
+
+ARM32_INSTALL_FOLDER=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).install
+ARM32_INSTALL_HOST=$(CURRENT_DIR)/$(BUILDFOLDER)/$(ARM32_FOLDER).$(TYPE_FOLDER).host.install
+
+# ARM32 build
+ARM32_BUILD_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_BUILD_ITEMS+=;pepper-csv2vec;crew
+ARM32_BUILD_ITEMS+=;oops;pepper-assert
+ARM32_BUILD_ITEMS+=;hermes;hermes-std
+ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
+ARM32_BUILD_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_BUILD_ITEMS+=;dio-hdf5
+ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_BUILD_ITEMS+=;luci
+ARM32_BUILD_ITEMS+=;luci-interpreter
+ARM32_BUILD_ITEMS+=;tflite2circle
+ARM32_BUILD_ITEMS+=;tflchef;circlechef
+ARM32_BUILD_ITEMS+=;circle2circle;record-minmax;circle-quantizer
+ARM32_BUILD_ITEMS+=;luci-eval-driver;luci-value-test
+
+ARM32_TOOLCHAIN_FILE=cmake/buildtool/cross/toolchain_armv7l-linux.cmake
+
+ARM32_HOST_ITEMS:=angkor;cwrap;pepper-str;pepper-strcast;pp
+ARM32_HOST_ITEMS+=;pepper-csv2vec
+ARM32_HOST_ITEMS+=;oops
+ARM32_HOST_ITEMS+=;hermes;hermes-std
+ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
+ARM32_HOST_ITEMS+=;safemain;mio-circle04;mio-tflite280
+ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
+ARM32_HOST_ITEMS+=;luci
+ARM32_HOST_ITEMS+=;luci-interpreter
+ARM32_HOST_ITEMS+=;tflite2circle
+ARM32_HOST_ITEMS+=;tflchef;circlechef
+ARM32_HOST_ITEMS+=;circle-tensordump
+ARM32_HOST_ITEMS+=;circle2circle
+ARM32_HOST_ITEMS+=;common-artifacts
+ARM32_HOST_ITEMS+=;luci-eval-driver;luci-value-test
+
+
+_SPACE_:=
+_SPACE_+=
+ARM32_BUILD_WHITELIST=$(subst $(_SPACE_),,$(ARM32_BUILD_ITEMS))
+ARM32_HOST_WHITELIST=$(subst $(_SPACE_),,$(ARM32_HOST_ITEMS))
+
+NNCC_CFG_OPTION+= -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_COVERAGE=OFF
+
+NNCC_CFG_STRICT= -DENABLE_STRICT_BUILD=ON
+
+INT_TARGETS:=int_configure_arm32 int_configure_arm32_host \
+  int_build_arm32 int_build_arm32_host int_test_arm32_host int_test
+
+NNCC_ARM32_DEBUG= -DBUILD_WHITELIST="$(ARM32_BUILD_WHITELIST)"
+NNCC_ARM32_DEBUG_HOST= -DBUILD_WHITELIST="$(ARM32_HOST_WHITELIST)"
+
+DEF_TARGETS:=all
+
+VAL_TARGETS:=cfg debug test_prep test
+
+.PHONY: $(INT_TARGETS) $(DEF_TARGETS) $(VAL_TARGETS)
+
+.DEFAULT_GOAL: help
+
+help:
+       @echo "cfg      : debug configure"
+       @echo "debug    : debug build"
+       @echo "test_prep: debug test preparation"
+       @echo "test     : debug test in target"
+
+###############################################################################
+# do not call int_xxxx directly as the depend on environment variables
+
+#
+# configures
+#
+
+int_configure_arm32_host:
+       NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc configure \
+               $(NNCC_CFG_OPTION) \
+               $(NNCC_ARM32_DEBUG_HOST) $(NNCC_CFG_STRICT) \
+               -DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_HOST)" \
+               -DENABLE_TEST=ON
+
+int_configure_arm32:
+       ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+       BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+       NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc configure \
+               $(NNCC_CFG_OPTION) $(NNCC_CFG_OPTION_EXTRA) \
+               $(NNCC_ARM32_DEBUG) $(NNCC_CFG_STRICT) \
+               -DCMAKE_TOOLCHAIN_FILE=$(ARM32_TOOLCHAIN_FILE) \
+               -DCMAKE_INSTALL_PREFIX="$(ARM32_INSTALL_FOLDER)" \
+               -DENABLE_TEST=ON
+
+
+#
+# builds
+#
+int_build_arm32_host:
+       NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc build -j1
+
+int_build_arm32:
+       ROOTFS_DIR=$(ROOTFS_ARM) TARGET_ARCH=armv7l \
+       BUILD_HOST_EXEC=$(CURRENT_DIR)/$(BUILD_ARM32_HOST) \
+       NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc build -j1
+
+#
+# host test; run test in host to generate random input and expected outputs
+#
+int_test_arm32_host:
+       NNCC_WORKSPACE=$(BUILD_ARM32_HOST) ./nncc test
+
+#
+# tests: run in ARM32 Ubuntu 18.04 device
+#
+int_test:
+       NNCC_WORKSPACE=$(BUILD_ARM32_FOLDER) ./nncc test
+
+################################################################################
+
+all: int_configure_arm32_host int_build_arm32_host int_configure_arm32 int_build_arm32
+
+cfg: int_configure_arm32_host int_build_arm32_host int_configure_arm32
+
+debug: int_build_arm32
+
+# NOTE before run test in ARM32, run test in host is required to prepare test data
+test_prep: int_test_arm32_host
+
+# NOTE run test in ARM32 Ubuntu 18.04 device
+test: int_test
diff --git a/infra/nncc/cmake/ApplyCompileFlags.cmake b/infra/nncc/cmake/ApplyCompileFlags.cmake

new file mode 100644 (file)

index 0000000..0cc5f9c
--- /dev/null
+++ b/infra/nncc/cmake/ApplyCompileFlags.cmake
@@ -0,0 +1,35 @@
+#
+# Platform independent compile flag setting
+#
+# flags for build type: debug, release
+set(CMAKE_C_FLAGS_DEBUG     "-O0 -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O0 -g -DDEBUG")
+set(CMAKE_C_FLAGS_RELEASE   "-O3 -DNDEBUG")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
+
+#
+# Platform specific compile flag setting
+#
+if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+  include("${CMAKE_CURRENT_LIST_DIR}/buildtool/config/config_${TARGET_PLATFORM}.cmake")
+endif()
+
+#
+# Apply compile flags
+# note: this should be placed after cmake/buildtool/config/config_xxx.cmake files
+#
+# add common flags
+foreach(FLAG ${FLAGS_COMMON})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
+
+# add c flags
+foreach(FLAG ${FLAGS_CONLY})
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLAG}")
+endforeach()
+
+# add cxx flags
+foreach(FLAG ${FLAGS_CXXONLY})
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+endforeach()
diff --git a/infra/nncc/cmake/CfgOptionFlags.cmake b/infra/nncc/cmake/CfgOptionFlags.cmake

new file mode 100644 (file)

index 0000000..773a1f7
--- /dev/null
+++ b/infra/nncc/cmake/CfgOptionFlags.cmake
@@ -0,0 +1,58 @@
+#
+# Platform specific configuration
+# note: this should be placed before default setting for option setting priority
+#       (platform specific setting have higher priority)
+#
+include("cmake/options/options_${TARGET_PLATFORM}.cmake")
+
+###
+### Configuration
+###
+option(DOWNLOAD_PROTOBUF "Download Protocol Buffer source" ON)
+option(BUILD_PROTOBUF "Locally build Protocol Buffer from the downloaded source" ON)
+option(DOWNLOAD_EIGEN "Download Eigen source" ON)
+option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
+option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
+option(DOWNLOAD_RUY "Download ruy source" ON)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" ON)
+option(DOWNLOAD_GFLAGS "Download GFlags source" OFF)
+option(DOWNLOAD_FLATBUFFERS "Download FlatBuffers source" ON)
+option(BUILD_FLATBUFFERS "Locally build Flatbuffers from the downloaded source" ON)
+option(DOWNLOAD_TENSORFLOW "Download TensorFlow source" ON)
+option(DOWNLOAD_CAFFE "Download Caffe source" ON)
+option(DOWNLOAD_PYTORCH "Download Pytorch source" ON)
+option(DOWNLOAD_ONNX "Download ONNX source" ON)
+option(DOWNLOAD_ABSEIL "Download Abseil-cpp source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download OpenCl Header source" ON)
+option(DOWNLOAD_PYBIND11 "Download Pybind11 source" ON)
+option(DOWNLOAD_JSONCPP "Download Jsoncpp source" ON)
+
+option(DOWNLOAD_GTEST "Download Google Test source" ON)
+option(BUILD_GTEST "Build Google Test from the downloaded source" ON)
+option(DOWNLOAD_HDF5 "Download HDF5 source" ON)
+option(BUILD_HDF5 "Build HDF5 from the downloaded source" ON)
+
+option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
+
+# This option might be turned ON for Windows native build.
+# Check our ProtobufConfig.cmake for its usage.
+option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
+
+# This option might be turned ON for MCU builds of luci related components.
+# It specify which library type to use for build:
+# if set ON - luci libraries are static, otherwise - shared.
+option(STATIC_LUCI "Build luci as a static libraries" OFF)
+
+# Disable PIC(Position-Independent Code) option for luci-interpreter related components.
+# This option might be turned ON for MCU builds.
+#
+# Enabled PIC requires additional efforts for correct linkage, such as
+# implementation of trampoline functions and support of various address tables.
+# PIC is used for dynamic libraries, MCU builds of interpreter
+# do not benefit from it, so we prefer to disable PIC.
+option(NNCC_LIBRARY_NO_PIC "Disable PIC option for libraries" OFF)
+
+# one-cmds PyTorch importer is an experimental feature, it is not used in default configuration.
+# This option enables installation of one-import-pytorch utility and
+# generation of related testsuite.
+option(ENABLE_ONE_IMPORT_PYTORCH "Enable deploy of one-cmds pytoch importer and related tests" OFF)
diff --git a/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake

new file mode 100644 (file)

index 0000000..c800f33
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_armv7l-linux.cmake
@@ -0,0 +1,24 @@
+#
+# armv7l linux compile options
+#
+
+message(STATUS "Building for ARMv7l Linux")
+
+# include linux common
+include("${CMAKE_CURRENT_LIST_DIR}/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-mcpu=cortex-a7"
+    "-mfloat-abi=hard"
+    "-ftree-vectorize"
+    "-mfp16-format=ieee"
+    )
+
+if(BUILD_ARM32_NEON)
+  set(FLAGS_COMMON ${FLAGS_COMMON}
+      "-mfpu=neon-vfpv4"
+      )
+else(BUILD_ARM32_NEON)
+  message(STATUS "ARMv7l: NEON is disabled")
+endif(BUILD_ARM32_NEON)
diff --git a/infra/nncc/cmake/buildtool/config/config_linux.cmake b/infra/nncc/cmake/buildtool/config/config_linux.cmake

new file mode 100644 (file)

index 0000000..d7b17cf
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/config/config_linux.cmake
@@ -0,0 +1,11 @@
+#
+# linux common compile options
+#
+
+# Disable annoying ABI compatibility warning.
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
+  list(APPEND FLAGS_CXXONLY "-Wno-psabi")
+endif()
+
+# lib pthread as a variable (pthread must be disabled on android)
+set(LIB_PTHREAD pthread)
diff --git a/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake

new file mode 100644 (file)

index 0000000..4956d91
--- /dev/null
+++ b/infra/nncc/cmake/buildtool/cross/toolchain_armv7l-linux.cmake
@@ -0,0 +1,38 @@
+#
+# config for arm-linux
+#
+include(CMakeForceCompiler)
+
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR armv7l)
+
+set(CMAKE_C_COMPILER   arm-linux-gnueabihf-gcc)
+set(CMAKE_CXX_COMPILER arm-linux-gnueabihf-g++)
+
+# where is the target environment
+set(NNAS_PROJECT_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../../..")
+set(ROOTFS_ARM "${NNAS_PROJECT_SOURCE_DIR}/tools/cross/rootfs/arm")
+include("${NNAS_PROJECT_SOURCE_DIR}/infra/cmake/modules/OptionTools.cmake")
+
+envoption(ROOTFS_DIR ${ROOTFS_ARM})
+if(NOT EXISTS "${ROOTFS_DIR}/lib/arm-linux-gnueabihf")
+  message(FATAL_ERROR "Please prepare RootFS for ARM")
+endif()
+
+set(CMAKE_SYSROOT ${ROOTFS_DIR})
+set(CMAKE_SHARED_LINKER_FLAGS
+    "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+    CACHE INTERNAL "" FORCE)
+set(CMAKE_EXE_LINKER_FLAGS
+    "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${ROOTFS_DIR}"
+    CACHE INTERNAL "" FORCE)
+
+# search for programs in the build host directories
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+
+# for libraries and headers in the target directories
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+# Set cache variable to ignore try-run error by find_package(Threads REQUIRED) on cross build
+set(THREADS_PTHREAD_ARG "2" CACHE STRING "Result from TRY_RUN" FORCE)
diff --git a/infra/nncc/cmake/options/options_armv7l-linux.cmake b/infra/nncc/cmake/options/options_armv7l-linux.cmake

new file mode 100644 (file)

index 0000000..d1cc367
--- /dev/null
+++ b/infra/nncc/cmake/options/options_armv7l-linux.cmake
@@ -0,0 +1,5 @@
+#
+# armv7l linux cmake options
+#
+
+option(BUILD_ARM32_NEON "Use NEON for ARM32 cross build" ON)
diff --git a/infra/nncc/cmake/options/options_x86_64-linux.cmake b/infra/nncc/cmake/options/options_x86_64-linux.cmake

new file mode 100644 (file)

index 0000000..0fb72f1
--- /dev/null
+++ b/infra/nncc/cmake/options/options_x86_64-linux.cmake
@@ -0,0 +1,3 @@
+#
+# x86_64 linux cmake options
+#
diff --git a/infra/nnfw/cmake/CfgOptionFlags.cmake b/infra/nnfw/cmake/CfgOptionFlags.cmake

index 438d6b2752d51b7b8d1d6bd8c242552d6616b485..5371120ad0b9c4c31bf8f3736e4e95c364413cd1 100644 (file)
--- a/infra/nnfw/cmake/CfgOptionFlags.cmake
+++ b/infra/nnfw/cmake/CfgOptionFlags.cmake
@@ -24,12 +24,13 @@ option(BUILD_NNAPI_TEST "Build nnapi_test" ON)
  option(BUILD_NNPACKAGE_RUN "Build nnpackge_run" ON)
  option(BUILD_TFLITE_LOADER "Build TensorFlow Lite loader" ON)
  option(BUILD_CIRCLE_LOADER "Build circle loader" ON)
+option(BUILD_TRIX_LOADER "Build trix loader" ON)
  option(BUILD_TFLITE_COMPARATOR_TEST_TOOL "Build tflite loader testing tool" ON)
  option(BUILD_WITH_HDF5 "Build test tool with HDF5 library" ON)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" ON)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" ON)
  option(INSTALL_TEST_SCRIPTS "Install test scripts" ON)
-option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
  #
  # Default build configuration for contrib
  #
@@ -54,8 +55,9 @@ option(BUILD_TFLITE_ACCURACY "Build tflite accuracy tool" OFF)
  # Default external libraries source download and build configuration
  #
  option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" OFF)
  option(DOWNLOAD_ABSEIL "Download Abseil source" ON)
-option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" OFF)
  option(DOWNLOAD_EIGEN "Download Eigen source" ON)
  option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
  option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
@@ -71,6 +73,7 @@ option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" ON)
  option(BUILD_BOOST "Build boost source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" ON)
  option(BUILD_TENSORFLOW_LITE_2_3_0 "Build TensorFlow Lite 2.3.0 from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" OFF)
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" ON)
  option(BUILD_RUY "Build ruy library from the downloaded source" ON)
  option(BUILD_CPUINFO "Build cpuinfo library from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake

new file mode 100644 (file)

index 0000000..dec1b4a
--- /dev/null
+++ b/infra/nnfw/cmake/buildtool/config/config_armv7hl-tizen.cmake
@@ -0,0 +1,22 @@
+#
+# armv7l tizen compile options
+#
+
+message(STATUS "Building for ARMv7hl(hardfp) Tizen")
+
+# Build flag for tizen
+set(CMAKE_C_FLAGS_DEBUG     "-O -g -DDEBUG")
+set(CMAKE_CXX_FLAGS_DEBUG   "-O -g -DDEBUG")
+
+# TODO : add and use option_tizen if something uncommon comes up
+# include linux common
+include("cmake/buildtool/config/config_linux.cmake")
+
+# addition for arm-linux
+set(FLAGS_COMMON ${FLAGS_COMMON}
+    "-mtune=cortex-a8"
+    "-mfloat-abi=hard"
+    "-mfpu=neon-vfpv4"
+    "-funsafe-math-optimizations"
+    "-ftree-vectorize"
+    )
diff --git a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake

index 57d4c10613511a3bcfac38be2be880212d17facc..ed6e35ebb99c09d3b1c5ba378926238e908717e1 100644 (file)
--- a/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_aarch64-tizen.cmake
@@ -4,6 +4,7 @@
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
diff --git a/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake

new file mode 100644 (file)

index 0000000..aa2d2f8
--- /dev/null
+++ b/infra/nnfw/cmake/options/options_armv7hl-tizen.cmake
@@ -0,0 +1,16 @@
+#
+# armv7hl tizen cmake options
+#
+option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
+option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
+option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
+
+option(BUILD_LOGGING "Build logging runtime" OFF)
+option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
+option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_armv7l-linux.cmake b/infra/nnfw/cmake/options/options_armv7l-linux.cmake

index e10e573c4ce7531237e3c7d8185beddb7ca3db25..325e7cc588c308308bd9ced24057d1c0e1b24215 100644 (file)
--- a/infra/nnfw/cmake/options/options_armv7l-linux.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-linux.cmake
@@ -3,3 +3,8 @@
  #
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
  option(BUILD_OPENCL_TOOL "Build OpenCL tool" ON)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake

index c27a7ad01fc21d08c53fed3958ad812a5adddef6..eab3b0a92971637f9080266bf9d0fc01f2b02dcf 100644 (file)
--- a/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_armv7l-tizen.cmake
@@ -4,7 +4,13 @@
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
  option(DOWNLOAD_NEON2SSE "Download NEON2SSE library source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
  option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OFF)
+
+option(DOWNLOAD_OPENCL_HEADERS "Download Opencl_headers source" ON)
+option(DOWNLOAD_TENSORFLOW_GPU "Download Tensorflow GPU delegate source" ON)
+option(BUILD_GPU_CL "Build gpu_cl backend" ON)
+option(BUILD_TENSORFLOW_LITE_GPU "Build TensorFlow Lite GPU delegate from the downloaded source" ON)
diff --git a/infra/nnfw/cmake/options/options_i686-tizen.cmake b/infra/nnfw/cmake/options/options_i686-tizen.cmake

index 7a425f068ae9a481b52708dab9aea75a17c103f3..14a3d555b4b403790434b1227bbd52c167821c3f 100644 (file)
--- a/infra/nnfw/cmake/options/options_i686-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_i686-tizen.cmake
@@ -4,6 +4,7 @@
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
  option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
  option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake

index 5dbcf7e086a4def599ebe6ea9f03dd774da88102..135cfbf6e985bb396a29db216cd7d4e2672fd2df 100644 (file)
--- a/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-darwin.cmake
@@ -4,5 +4,3 @@
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
  option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
  option(BUILD_XNNPACK "Build XNNPACK" OFF)
-option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-linux.cmake b/infra/nnfw/cmake/options/options_x86_64-linux.cmake

index 5dbcf7e086a4def599ebe6ea9f03dd774da88102..1cb72d5931285cbdbf14c4a2c3292ab5e14874c4 100644 (file)
--- a/infra/nnfw/cmake/options/options_x86_64-linux.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-linux.cmake
@@ -2,7 +2,5 @@
  # x86_64 linux cmake options
  #
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
-option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
  option(BUILD_XNNPACK "Build XNNPACK" OFF)
-option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
+option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
diff --git a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake

index 0f1c86d232d60d6c385ba466a6a028061b244d28..31b7fd6fba3293d796a20548355368dbe7271a9e 100644 (file)
--- a/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
+++ b/infra/nnfw/cmake/options/options_x86_64-tizen.cmake
@@ -2,8 +2,8 @@
  # x86_64 linux cmake options
  #
  option(BUILD_ARMCOMPUTE "Build ARM Compute from the downloaded source" OFF)
-option(BUILD_TENSORFLOW_LITE "Build TensorFlow Lite from the downloaded source" OFF)
  option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" OFF)
+option(DOWNLOAD_GTEST "Download Google Test source and build Google Test" OFF)
  
  option(BUILD_LOGGING "Build logging runtime" OFF)
  option(GENERATE_RUNTIME_NNAPI_TESTS "Generate NNAPI operation gtest" OFF)
@@ -11,4 +11,3 @@ option(ENVVAR_ONERT_CONFIG "Use environment variable for onert configuration" OF
  
  option(BUILD_XNNPACK "Build XNNPACK" OFF)
  option(DOWNLOAD_OPENCL_HEADERS "Download opencl headers" OFF)
-option(BUILD_GPU_CL "Build gpu_cl backend" OFF)
diff --git a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake

index 99ee795c1239543344c55fe1a968258b996b413d..878026d9a7656aa1e7877ae122cb71f3c33becd2 100644 (file)
--- a/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
+++ b/infra/nnfw/cmake/packages/CpuInfoConfig.cmake
@@ -14,6 +14,8 @@ function(_CpuInfo_Build)
      return()
    endif(NOT CpuInfoSource_FOUND)
  
+  nnas_include(ExternalProjectTools)
+
    set(CPUINFO_BUILD_TOOLS OFF CACHE BOOL "Build command-line tools")
    set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "Build cpuinfo unit tests")
    set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "Build cpuinfo mock tests")
diff --git a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake

index d27ac1435a4e51180a9b71a6333bc952e9bcf2aa..032724ae2a47f0431f5a930ea8fc6d2e0c502ead 100644 (file)
--- a/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/nnfw/cmake/packages/FlatBuffersConfig.cmake
@@ -6,8 +6,8 @@ function(_FlatBuffers_import)
      return()
    endif(Flatbuffers_FOUND)
  
-  # NOTE Tizen uses 1.12
-  nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
+  # NOTE Tizen uses 2.0
+  nnas_find_package(FlatBuffersSource EXACT 2.0 QUIET)
  
    if(NOT FlatBuffersSource_FOUND)
      set(FlatBuffers_FOUND FALSE PARENT_SCOPE)
diff --git a/infra/nnfw/cmake/packages/GTestConfig.cmake b/infra/nnfw/cmake/packages/GTestConfig.cmake

index 54695531ea1b485199f3567f6541b30eed8e40ec..ab2b39e00e2e4c8871f21d075b3f8a77f029e997 100644 (file)
--- a/infra/nnfw/cmake/packages/GTestConfig.cmake
+++ b/infra/nnfw/cmake/packages/GTestConfig.cmake
@@ -16,7 +16,11 @@ if(${DOWNLOAD_GTEST})
  endif(${DOWNLOAD_GTEST})
  
  ### Find and use pre-installed Google Test
-find_package(GTest)
+if(NOT GTest_FOUND)
+  # Reset package config directory cache to prevent recursive find
+  unset(GTest_DIR CACHE)
+  find_package(GTest)
+endif(NOT GTest_FOUND)
  find_package(Threads)
  
  if(${GTEST_FOUND} AND TARGET Threads::Threads)
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake

new file mode 100644 (file)

index 0000000..dfc10eb
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfig.cmake
@@ -0,0 +1,42 @@
+# Looking for pre-installed TRIX engine package
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+function(_TRIXEngine_import)
+  # Find the header & lib
+  find_library(TRIXEngine_LIB
+    NAMES npu-engine
+    PATHS "${TRIX_ENGINE_PREFIX}/lib"
+  )
+
+  find_path(TRIXEngine_INCLUDE_DIR
+    NAMES libnpuhost.h
+    PATHS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+  )
+
+  set(TRIXEngine_FOUND TRUE)
+
+  if(NOT TRIXEngine_LIB)
+    set(TRIXEngine_FOUND FALSE)
+  endif(NOT TRIXEngine_LIB)
+
+  if(NOT TRIXEngine_INCLUDE_DIR)
+    set(TRIXEngine_FOUND FALSE)
+  endif(NOT TRIXEngine_INCLUDE_DIR)
+
+  if(NOT TRIXEngine_FOUND)
+    message(STATUS "Failed to find TRIX Engine")
+  else(NOT TRIXEngine_FOUND)
+
+    # Add target
+    if(NOT TARGET trix_engine)
+      add_library(trix_engine INTERFACE)
+      target_link_libraries(trix_engine INTERFACE ${TRIXEngine_LIB})
+      target_include_directories(trix_engine INTERFACE ${TRIXEngine_INCLUDE_DIR})
+    endif(NOT TARGET trix_engine)
+  endif(NOT TRIXEngine_FOUND)
+
+  set(TRIXEngine_FOUND ${TRIXEngine_FOUND} PARENT_SCOPE)
+  set(TRIXEngine_INCLUDE_DIRS ${TRIXEngine_INCLUDE_DIR} PARENT_SCOPE)
+endfunction(_TRIXEngine_import)
+
+_TRIXEngine_import()
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake

new file mode 100644 (file)

index 0000000..0e0a043
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.cmake
@@ -0,0 +1,104 @@
+# This script need to set:
+#
+# VARIABLE                   | description
+# ---                        | ---
+# PACKAGE_VERSION            | full provided version string
+# PACKAGE_VERSION_EXACT      | true if version is exact match
+# PACKAGE_VERSION_COMPATIBLE | true if version is compatible
+# PACKAGE_VERSION_UNSUITABLE | true if unsuitable as any version
+#
+# Reference: https://cmake.org/cmake/help/v3.10/command/find_package.html
+
+set(TRIX_ENGINE_PREFIX "/usr" CACHE PATH "Where to find TRIX engine header and library")
+
+if(NOT PACKAGE_FIND_VERSION)
+  message(FATAL_ERROR "Please pass version requirement to use TRIX Engine dependency")
+endif()
+
+# Find the header & lib from TRIX_ENGINE_PREFIX
+find_library(TRIXEngine_LIB
+  NAMES npu-engine
+  HINTS "${TRIX_ENGINE_PREFIX}/lib"
+)
+find_path(TRIXEngine_INCLUDE_DIR
+  NAMES libnpuhost.h
+  HINTS "${TRIX_ENGINE_PREFIX}/include/npu-engine"
+)
+
+if(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+  set(PACKAGE_VERSION_EXACT FALSE)
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+  set(PACKAGE_VERSION_UNSUITABLE TRUE)
+  return()
+endif(NOT TRIXEngine_INCLUDE_DIR OR NOT TRIXEngine_LIB)
+
+# TODO Assert TRIX_ENGINE_PREFIX is directory
+
+# TODO Can we run this only once per configure?
+try_run(MAJOR_VER MAJOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.major"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.major.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+if(NOT MAJOR_COMPILABLE)
+  # This means VERSION < 2.2.7
+  # `getVersion` API introduced from TRIX Engine 2.2.7
+  if(PACKAGE_FIND_VERSION VERSION_GREATER_EQUAL 2.2.7)
+    set(PACKAGE_VERSION_EXACT FALSE)
+    set(PACKAGE_VERSION_COMPATIBLE FALSE)
+    set(PACKAGE_VERSION_UNSUITABLE TRUE)
+    return()
+  else()
+    # TODO How to support this case?
+    message(FATAL_ERROR "TRIX Engine version is too low (< 2.2.7)")
+  endif()
+endif(NOT MAJOR_COMPILABLE)
+
+try_run(MINOR_VER MINOR_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.minor"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.minor.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+try_run(EXTRA_VER EXTRA_COMPILABLE "${CMAKE_BINARY_DIR}/TRIXEngineConfigVersion.extra"
+  SOURCES "${CMAKE_CURRENT_LIST_DIR}/TRIXEngineConfigVersion.extra.cpp"
+  CMAKE_FLAGS
+  "-DINCLUDE_DIRECTORIES=${TRIXEngine_INCLUDE_DIR}"
+  "-DLINK_LIBRARIES=${TRIXEngine_LIB}"
+)
+
+macro(assert)
+  # if(NOT ${ARGV}) makes error when ARGV starts with 'NOT'
+  if(${ARGV})
+    # Do nothing
+  else(${ARGV})
+    message(FATAL_ERROR "Internal error ${ARGV}")
+  endif(${ARGV})
+endmacro(assert)
+
+assert(MAJOR_COMPILABLE)
+assert(MINOR_COMPILABLE)
+assert(EXTRA_COMPILABLE)
+assert(NOT MAJOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT MINOR_VER STREQUAL FAILED_TO_RUN)
+assert(NOT EXTRA_VER STREQUAL FAILED_TO_RUN)
+
+set(PACKAGE_VERSION ${MAJOR_VER}.${MINOR_VER}.${EXTRA_VER})
+
+if(PACKAGE_VERSION VERSION_EQUAL PACKAGE_FIND_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+else()
+  set(PACKAGE_VERSION_EXACT FALSE)
+endif()
+
+# Assume TRIX Engine is backward compatible
+if(PACKAGE_VERSION VERSION_GREATER_EQUAL PACKAGE_FIND_VERSION)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+else()
+  set(PACKAGE_VERSION_COMPATIBLE FALSE)
+endif()
+
+set(PACKAGE_VERSION_UNSUITABLE FALSE)
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp

new file mode 100644 (file)

index 0000000..05fe70d
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.extra.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(nullptr, nullptr, &ret);
+  return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp

new file mode 100644 (file)

index 0000000..a3de06d
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.major.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(&ret, nullptr, nullptr);
+  return ret;
+}
diff --git a/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp

new file mode 100644 (file)

index 0000000..1193a5c
--- /dev/null
+++ b/infra/nnfw/cmake/packages/TRIXEngineConfigVersion.minor.cpp
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <libnpuhost.h>
+
+int main(void)
+{
+  uint32_t ret = 0;
+  getVersion(nullptr, &ret, nullptr);
+  return ret;
+}
diff --git a/infra/nnfw/config/gbs.conf b/infra/nnfw/config/gbs.conf

index bad9eb204605890320a9f2191f5a0a5c66b4bd55..1150a5fc8558b578273884104b010d0f5f8a84b8 100644 (file)
--- a/infra/nnfw/config/gbs.conf
+++ b/infra/nnfw/config/gbs.conf
@@ -18,5 +18,5 @@ url = http://download.tizen.org/snapshots/tizen/unified/latest/repos/standard/pa
  url = http://download.tizen.org/snapshots/tizen/base/latest/repos/standard/packages/
  
  [repo.tizen_one]
-url = http://nnfw.mooo.com/archive/tizen/
+url = http://13.125.34.93/archive/tizen/
  
diff --git a/infra/packaging/build b/infra/packaging/build

index 53d63713b12aabf0e8b560ad16ad0fb2c109ec5e..5d6bdd999594221a5fb7a31cb1386bb0ede488f2 100644 (file)
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,10 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
  fi
  
  # The default preset
-PRESET="20210910"
+PRESET="20220323"
+
+# Test is enabled by default
+DISABLE_TEST=false
  
  EXTRA_OPTIONS=()
  while [ "$#" -ne 0 ]; do
@@ -23,6 +26,10 @@ while [ "$#" -ne 0 ]; do
        PRESET="$2"
        shift 2
        ;;
+    '--notest')
+      DISABLE_TEST=true
+      shift
+      ;;
      '--')
        shift
        while [ "$#" -ne 0 ]; do
@@ -44,6 +51,10 @@ if [[ -z "${NNAS_INSTALL_PREFIX}" ]]; then
    exit 255
  fi
  
+if [[ "${DISABLE_TEST}" == "true" ]]; then
+  EXTRA_OPTIONS+=("-DENABLE_TEST=OFF")
+fi
+
  PRESET_PATH="${SCRIPT_PATH}/preset/${PRESET}"
  
  if [[ ! -f "${PRESET_PATH}" ]]; then
diff --git a/infra/packaging/preset/20220323 b/infra/packaging/preset/20220323

new file mode 100644 (file)

index 0000000..421106c
--- /dev/null
+++ b/infra/packaging/preset/20220323
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20220323"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20220323_windows b/infra/packaging/preset/20220323_windows

new file mode 100644 (file)

index 0000000..60500b1
--- /dev/null
+++ b/infra/packaging/preset/20220323_windows
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
+  # Data I/O
+  REQUIRED_UNITS+=("dio-hdf5")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20220323" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20220323 b/infra/packaging/res/tf2nnpkg.20220323

new file mode 100644 (file)

index 0000000..0d44818
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20220323
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh

index e520dd3816ee2c52b371e67de342ad8d47aace21..6a857d2c8cfac63f2985bbfb3a0586b6745f7327 100644 (file)
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -8,7 +8,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
  DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
  DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
  DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite;mio-tflite260"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle04;mio-tflite;mio-tflite260;mio-tflite280"
  DEBUG_BUILD_ITEMS+=";tflite2circle"
  DEBUG_BUILD_ITEMS+=";luci"
  DEBUG_BUILD_ITEMS+=";luci-interpreter"
diff --git a/infra/scripts/docker_build_test_x64.sh b/infra/scripts/docker_build_test_x64.sh

index 9f3966af798e7870b532a62ee430ad41efc7e441..26d8de4a9bb84484798f875bae4d0bad0939a4c1 100755 (executable)
--- a/infra/scripts/docker_build_test_x64.sh
+++ b/infra/scripts/docker_build_test_x64.sh
@@ -31,8 +31,9 @@ pushd $ROOT_PATH > /dev/null
  
  export DOCKER_ENV_VARS
  export DOCKER_VOLUMES
+export BUILD_OPTIONS
  # Disable nnpackage_run build: mismatch between buildtool for CI and installed hdf5
-CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF' && \
+CMD="export OPTIONS='-DBUILD_NNPACKAGE_RUN=OFF $BUILD_OPTIONS' && \
       export BUILD_TYPE=Release && \
       cp -nv Makefile.template Makefile && \
       make all install build_test_suite"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh

index 475da6d06a8fa1f64d071dc7df78df77b5c8d526..06cf8809a2d3fcf098cf60a09a761ffb795b5d4d 100755 (executable)
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
  # Circle compiler library (.circle -> .circle)
  REQUIRED_UNITS+=("luci")
  # Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-tflite280" "mio-circle04")
  # Tools
  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
  REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/nnpackage/spec/10_packaging_and_manifest.md b/nnpackage/spec/10_packaging_and_manifest.md

index 4dc3de874ce449ec1d77fec1a28941e8b6f074a1..6aa0db74e85729d9fcdff29cf882a7ee548fe3ad 100644 (file)
--- a/nnpackage/spec/10_packaging_and_manifest.md
+++ b/nnpackage/spec/10_packaging_and_manifest.md
@@ -83,6 +83,7 @@ It can have the values (case-sensitive) in following table.
  |--------|------------------------|
  | tflite | tensorflow lite schema |
  | circle | nnpackage schema       |
+| tvn    | trix-engine binary     |
  
  ### Example
  
@@ -91,11 +92,11 @@ Here is an example of `MANIFEST`.
  ```
  {
      "major-version" : "1",
-    "minor-version" : "1",
+    "minor-version" : "2",
      "patch-version" : "0",
      "configs"     : [ "model.cfg" ],
-    "models"      : [ "mymodel.model", "yourmodel.model" ],
-    "model-types" : [ "tflite", "circle" ]
+    "models"      : [ "mymodel.model", "yourmodel.model", "binmodel.tvn" ],
+    "model-types" : [ "tflite", "circle", "tvn" ]
  }
  ```
  
diff --git a/packaging/CPUINFO.tar.gz b/packaging/CPUINFO.tar.gz

new file mode 100644 (file)

index 0000000..ced5deb

Binary files /dev/null and b/packaging/CPUINFO.tar.gz differ
diff --git a/packaging/GEMMLOWP.tar.gz b/packaging/GEMMLOWP.tar.gz

new file mode 100644 (file)

index 0000000..198dc14

Binary files /dev/null and b/packaging/GEMMLOWP.tar.gz differ
diff --git a/packaging/OOURAFFT.tar.gz b/packaging/OOURAFFT.tar.gz

new file mode 100644 (file)

index 0000000..85cf7fd

Binary files /dev/null and b/packaging/OOURAFFT.tar.gz differ
diff --git a/packaging/RUY.tar.gz b/packaging/RUY.tar.gz

new file mode 100644 (file)

index 0000000..9ad14fe

Binary files /dev/null and b/packaging/RUY.tar.gz differ
diff --git a/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz b/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz

new file mode 100644 (file)

index 0000000..396d12f

Binary files /dev/null and b/packaging/TENSORFLOW-2.3.0-EIGEN.tar.gz differ
diff --git a/packaging/TENSORFLOW_GPU.tar.gz b/packaging/TENSORFLOW_GPU.tar.gz

new file mode 100644 (file)

index 0000000..5133fee

Binary files /dev/null and b/packaging/TENSORFLOW_GPU.tar.gz differ
diff --git a/packaging/cpuinfo.tar.gz b/packaging/cpuinfo.tar.gz

deleted file mode 100644 (file)

index ced5deb..0000000

Binary files a/packaging/cpuinfo.tar.gz and /dev/null differ
diff --git a/packaging/eigen.tar.gz b/packaging/eigen.tar.gz

deleted file mode 100644 (file)

index 396d12f..0000000

Binary files a/packaging/eigen.tar.gz and /dev/null differ
diff --git a/packaging/gemmlowp.tar.gz b/packaging/gemmlowp.tar.gz

deleted file mode 100644 (file)

index 198dc14..0000000

Binary files a/packaging/gemmlowp.tar.gz and /dev/null differ
diff --git a/packaging/gtest.tar.gz b/packaging/gtest.tar.gz

deleted file mode 100644 (file)

index b8c3191..0000000

Binary files a/packaging/gtest.tar.gz and /dev/null differ
diff --git a/packaging/nnapi_test_generated.tar.gz b/packaging/nnapi_test_generated.tar.gz

index 504dbf9a65dff20423750a37c0d05a5df6e5b96c..446bd22c38dadaaff3a81778bdf68495255b820d 100644 (file)

Binary files a/packaging/nnapi_test_generated.tar.gz and b/packaging/nnapi_test_generated.tar.gz differ
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec

index 547d46a0ddf1dbed0d81054ff52f3376fbbb16ad..324fe1d11b007889d578e2187c54544427582cda 100644 (file)
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,18 +1,19 @@
  Name:    nnfw
  Summary: nnfw
-Version: 1.19.0
+Version: 1.20.0
  Release: 1
  Group:   Development
  License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
  
  Source0: %{name}-%{version}.tar.gz
  Source1: %{name}.manifest
+# TODO Update source number
  Source1001: nnapi_test_generated.tar.gz
-Source1002: gtest.tar.gz
-Source1003: eigen.tar.gz
-Source1004: gemmlowp.tar.gz
-Source1005: ruy.tar.gz
-Source1006: cpuinfo.tar.gz
+#Source1002: GTEST.tar.gz
+Source1003: TENSORFLOW-2.3.0-EIGEN.tar.gz
+Source1004: GEMMLOWP.tar.gz
+Source1005: RUY.tar.gz
+Source1006: CPUINFO.tar.gz
  Source1007: XNNPACK.tar.gz
  Source1008: FXDIV.tar.gz
  Source1009: PTHREADPOOL.tar.gz
@@ -21,11 +22,13 @@ Source1011: FP16.tar.gz
  Source1012: OPENCL_HEADERS.tar.gz
  Source1013: FARMHASH.tar.gz
  Source1014: ABSEIL.tar.gz
-Source1015: oourafft.tar.gz
+Source1015: OOURAFFT.tar.gz
+Source1016: TENSORFLOW_GPU.tar.gz
  Source2001: nnfw.pc.in
  Source2002: nnfw-plugin.pc.in
  
  %{!?build_type:     %define build_type      Release}
+%{!?trix_support:   %define trix_support    1}
  %{!?coverage_build: %define coverage_build  0}
  %{!?test_build:     %define test_build      0}
  %{!?extra_option:   %define extra_option    %{nil}}
@@ -53,6 +56,11 @@ BuildRequires:  hdf5-devel
  BuildRequires:  libaec-devel
  BuildRequires:  zlib-devel
  BuildRequires:  libjpeg-devel
+BuildRequires:  gtest-devel
+%endif
+
+%if %{trix_support} == 1
+BuildRequires:  npu-engine-devel
  %endif
  
  %description
@@ -86,9 +94,12 @@ Summary: NNFW Test
  NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
  %endif
  
-%ifarch %{arm}
+%ifarch armv7l
  %define target_arch armv7l
  %endif
+%ifarch armv7hl
+%define target_arch armv7hl
+%endif
  %ifarch x86_64
  %define target_arch x86_64
  %endif
@@ -123,7 +134,7 @@ NNFW test rpm. It does not depends on nnfw rpm since it contains nnfw runtime.
  cp %{SOURCE1} .
  mkdir ./externals
  tar -xf %{SOURCE1001} -C ./tests/nnapi/src/
-tar -xf %{SOURCE1002} -C ./externals
+#tar -xf %{SOURCE1002} -C ./externals
  tar -xf %{SOURCE1003} -C ./externals
  tar -xf %{SOURCE1004} -C ./externals
  tar -xf %{SOURCE1005} -C ./externals
@@ -137,9 +148,10 @@ tar -xf %{SOURCE1012} -C ./externals
  tar -xf %{SOURCE1013} -C ./externals
  tar -xf %{SOURCE1014} -C ./externals
  tar -xf %{SOURCE1015} -C ./externals
+tar -xf %{SOURCE1016} -C ./externals
  
  %build
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  # runtime build
  %{build_env} ./nnfw configure %{build_options} %{extra_option}
  %{build_env} ./nnfw build -j4
@@ -157,10 +169,10 @@ pwd > tests/scripts/build_path.txt
  %endif # coverage_build
  tar -zcf test-suite.tar.gz infra/scripts
  %endif # test_build
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
  
  %install
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  
  mkdir -p %{buildroot}%{_libdir}
  mkdir -p %{buildroot}%{_bindir}
@@ -204,7 +216,7 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  %{_libdir}/*.so
  %exclude %{_includedir}/CL/*
  %endif
@@ -212,7 +224,7 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files devel
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  %dir %{_includedir}/nnfw
  %{_includedir}/nnfw/*
  %{_libdir}/pkgconfig/nnfw.pc
@@ -221,13 +233,13 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files plugin-devel
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  %dir %{_includedir}/onert
  %{_includedir}/onert/*
  %{_libdir}/pkgconfig/nnfw-plugin.pc
  %endif
  
-%ifarch arm armv7l aarch64 x86_64 %ix86
+%ifarch arm armv7l armv7hl aarch64 x86_64 %ix86
  %files minimal-app
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
@@ -238,10 +250,10 @@ install -m 0644 ./tests/scripts/build_path.txt %{buildroot}%{test_install_dir}/t
  %files test
  %manifest %{name}.manifest
  %defattr(-,root,root,-)
-%ifarch arm armv7l aarch64 x86_64
+%ifarch arm armv7l armv7hl aarch64 x86_64
  %dir %{test_install_home}
  %{test_install_home}/*
-%endif # arm armv7l aarch64
+%endif # arm armv7l armv7hl aarch64
  %endif # test_build
  
  %changelog
diff --git a/packaging/oourafft.tar.gz b/packaging/oourafft.tar.gz

deleted file mode 100644 (file)

index 85cf7fd..0000000

Binary files a/packaging/oourafft.tar.gz and /dev/null differ
diff --git a/packaging/ruy.tar.gz b/packaging/ruy.tar.gz

deleted file mode 100644 (file)

index 9ad14fe..0000000

Binary files a/packaging/ruy.tar.gz and /dev/null differ
diff --git a/res/CircleSchema/0.3/circle_schema.fbs b/res/CircleSchema/0.3/circle_schema.fbs

new file mode 100644 (file)

index 0000000..3972056
--- /dev/null
+++ b/res/CircleSchema/0.3/circle_schema.fbs
@@ -0,0 +1,1137 @@
+// Copyright (c) 2019~2020 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+
+enum BuiltinOperator : ubyte {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  // TODO(aselle): Consider rename to CONCATENATE_EMBEDDINGS
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  BCQ_GATHER = 252,
+  BCQ_FULLY_CONNECTED = 253,
+  INSTANCE_NORM = 254,
+}
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  builtin_code:BuiltinOperator;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+}
+
+root_type Model;
diff --git a/res/CircleSchema/0.4/circle_schema.fbs b/res/CircleSchema/0.4/circle_schema.fbs

new file mode 100644 (file)

index 0000000..8ad444d
--- /dev/null
+++ b/res/CircleSchema/0.4/circle_schema.fbs
@@ -0,0 +1,1292 @@
+// Copyright (c) 2019~2022 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: int;
+  seed2: int;
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph
+  data_format: DataFormat;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py

new file mode 100644 (file)

index 0000000..cecc9c8
--- /dev/null
+++ b/res/PyTorchExamples/examples/BatchToSpaceND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow batch_to_space, but with channels first layout
+class net_BatchToSpaceND(nn.Module):
+    def __init__(self, block_shape, crop):
+        super().__init__()
+        self.block_shape = block_shape
+        self.crop = crop
+
+    def forward(self, input):
+        # Prepare attributes
+        input_shape = list(map(int, list(input.shape)))
+        block_shape = self.block_shape
+        crop = self.crop
+
+        # number of spatial dimensions
+        m = len(block_shape)
+        # rest of dimensions
+        n = len(input.shape) - m
+        # output batch size
+        batch_size = input_shape[0] // np.product(block_shape)
+
+        unfolded_shape = list(block_shape) + [batch_size] + input_shape[1:]
+        fold_shape = [batch_size] + input_shape[1:n] + [
+            input_shape[i + n] * block_shape[i] for i in range(m)
+        ]
+        permute_dims = list(range(
+            m, m + n)) + [i + mod for i in range(m) for mod in [n + m, 0]]
+
+        # Actual model starts here
+        unfolded_input = input.reshape(unfolded_shape)
+        permuted = torch.permute(unfolded_input, permute_dims)
+        full_output = permuted.reshape(fold_shape)
+        # crop output tensor
+        crop_output = full_output
+        for i in range(m):
+            crop_size = sum(crop[i])
+            crop_output = crop_output.narrow(i + n, crop[i][0],
+                                             fold_shape[i + n] - crop_size)
+        return crop_output
+
+
+_model_ = net_BatchToSpaceND([2, 2], [[1, 0], [0, 1]])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(8, 4, 3, 3)
diff --git a/res/PyTorchExamples/examples/Conv2d-pad/__init__.py b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py

new file mode 100644 (file)

index 0000000..4c2b45e
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-pad/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_Conv2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(1, 1, 1, padding=(1, 0))
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_Conv2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 1, 5, 17)
diff --git a/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py

new file mode 100644 (file)

index 0000000..235015c
--- /dev/null
+++ b/res/PyTorchExamples/examples/Conv2d-yuv2rgb/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+
+# model representing YUVtoRGB conversion
+# for details see https://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
+class net_Conv2dYUVtoRGB(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.Conv2d(3, 3, 1, bias=False)
+        raw_weights = [[1.0, 0.0, 1.13983], \
+                       [1.0, -0.39465, -0.58060], \
+                       [1.0, 2.03211, 0.0]]
+        weights = torch.Tensor(raw_weights).reshape(3, 3, 1, 1)
+        self.op.weight = weight = torch.nn.Parameter(weights, requires_grad=False)
+
+    def forward(self, input):
+        return torch.clamp(self.op(input), 0.0, 1.0)
+
+
+_model_ = net_Conv2dYUVtoRGB()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 4, 4)
diff --git a/res/PyTorchExamples/examples/LSTM-bi/__init__.py b/res/PyTorchExamples/examples/LSTM-bi/__init__.py

new file mode 100644 (file)

index 0000000..6f5cea4
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-bi/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 5
+_batch_size = 3
+_input_size = 10
+_hidden_size = 20
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bidirectional=True)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_length, _batch_size, _input_size),
+    torch.randn(_number_layers * 2, _batch_size, _hidden_size),
+    torch.randn(_number_layers * 2, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-nobias/__init__.py b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py

new file mode 100644 (file)

index 0000000..d64704a
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-nobias/__init__.py
@@ -0,0 +1,28 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 2
+_batch_size = 5
+_input_size = 15
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers, bias=False)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], (inputs[1], inputs[2]))
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_length, _batch_size, _input_size),
+    torch.randn(_number_layers, _batch_size, _hidden_size),
+    torch.randn(_number_layers, _batch_size, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/LSTM-noinit/__init__.py b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py

new file mode 100644 (file)

index 0000000..7aa79d6
--- /dev/null
+++ b/res/PyTorchExamples/examples/LSTM-noinit/__init__.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+
+_seq_length = 1
+_batch_size = 5
+_input_size = 8
+_hidden_size = 10
+_number_layers = 1
+
+
+# model
+class net_LSTM(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.LSTM(_input_size, _hidden_size, _number_layers)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_LSTM()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_length, _batch_size, _input_size)
diff --git a/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py

new file mode 100644 (file)

index 0000000..4225cd6
--- /dev/null
+++ b/res/PyTorchExamples/examples/MaxPool2d-am/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_MaxPool2d(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.MaxPool2d(3, stride=1, return_indices=True)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_MaxPool2d()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 4, 4)
diff --git a/res/PyTorchExamples/examples/PixelShuffle/__init__.py b/res/PyTorchExamples/examples/PixelShuffle/__init__.py

new file mode 100644 (file)

index 0000000..14374ce
--- /dev/null
+++ b/res/PyTorchExamples/examples/PixelShuffle/__init__.py
@@ -0,0 +1,18 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_PixelShuffle(nn.Module):
+    def __init__(self, upscale_factor):
+        super().__init__()
+        self.op = torch.nn.PixelShuffle(upscale_factor)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_PixelShuffle(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 8, 3, 3)
diff --git a/res/PyTorchExamples/examples/RNN-bi/__init__.py b/res/PyTorchExamples/examples/RNN-bi/__init__.py

new file mode 100644 (file)

index 0000000..86f6e4f
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-bi/__init__.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+
+_input_size = 3
+_seq_len = 2
+_batch = 2
+_hidden_size = 5
+_num_layers = 2
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, _num_layers, bidirectional=True)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(2 * _num_layers, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-nobias/__init__.py b/res/PyTorchExamples/examples/RNN-nobias/__init__.py

new file mode 100644 (file)

index 0000000..a6a3148
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-nobias/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1, bias=False)
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/RNN-noinit/__init__.py b/res/PyTorchExamples/examples/RNN-noinit/__init__.py

new file mode 100644 (file)

index 0000000..492c2d0
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-noinit/__init__.py
@@ -0,0 +1,23 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1)
+
+    def forward(self, input):
+        return self.op(input)
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(_seq_len, _batch, _input_size)
diff --git a/res/PyTorchExamples/examples/RNN-relu/__init__.py b/res/PyTorchExamples/examples/RNN-relu/__init__.py

new file mode 100644 (file)

index 0000000..c59c421
--- /dev/null
+++ b/res/PyTorchExamples/examples/RNN-relu/__init__.py
@@ -0,0 +1,26 @@
+import torch
+import torch.nn as nn
+
+_input_size = 4
+_seq_len = 2
+_batch = 3
+_hidden_size = 3
+
+
+# model
+class net_RNN(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.op = nn.RNN(_input_size, _hidden_size, 1, nonlinearity='relu')
+
+    def forward(self, inputs):
+        return self.op(inputs[0], inputs[1])
+
+
+_model_ = net_RNN()
+
+# dummy input for onnx generation
+_dummy_ = [
+    torch.randn(_seq_len, _batch, _input_size),
+    torch.randn(1, _batch, _hidden_size)
+]
diff --git a/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py

new file mode 100644 (file)

index 0000000..78d57fd
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToBatchND/__init__.py
@@ -0,0 +1,49 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model equivalent to tensorflow space_to_batch, but with channels first layout
+class net_SpaceToBatchND(nn.Module):
+    def __init__(self, block_shape, pad):
+        super().__init__()
+        self.block_shape = block_shape
+        self.pad = pad
+
+    def forward(self, input):
+        # Prepare attributes
+        input_shape = list(map(int, list(input.shape)))
+        block_shape = self.block_shape
+        pad = self.pad
+
+        # number of spatial dimensions
+        m = len(block_shape)
+        # rest of dimensions
+        n = len(input.shape) - m
+        # output batch size
+        batch_size = input_shape[0]
+
+        out_spatial_dim = [
+            (input_shape[i + n] + pad[i * 2] + pad[i * 2 + 1]) // block_shape[i]
+            for i in range(m)
+        ]
+        unfolded_shape = [batch_size] + input_shape[1:n] + [
+            dim for i in range(m) for dim in [out_spatial_dim[i], block_shape[i]]
+        ]
+        fold_shape = [batch_size * np.prod(block_shape)
+                      ] + input_shape[1:n] + out_spatial_dim
+        permute_dims = list(range(n + 1, n + 2 * m, 2)) + list(range(n)) + list(
+            range(n, n + 2 * m, 2))
+
+        # Actual model starts here
+        padded_input = torch.nn.functional.pad(input, pad)
+        unfolded_input = padded_input.reshape(unfolded_shape)
+        permuted = torch.permute(unfolded_input, permute_dims)
+        output = permuted.reshape(fold_shape)
+        return output
+
+
+_model_ = net_SpaceToBatchND([2, 2], [1, 0, 0, 1])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(2, 4, 5, 5)
diff --git a/res/PyTorchExamples/examples/SpaceToDepth/__init__.py b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py

new file mode 100644 (file)

index 0000000..62b225d
--- /dev/null
+++ b/res/PyTorchExamples/examples/SpaceToDepth/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+# model, equivalent to torch.pixel_unshuffle from torch 1.9+
+class net_SpaceToDepth(nn.Module):
+    def __init__(self, block_size):
+        super().__init__()
+        self.block_size = block_size
+
+    def forward(self, input):
+        # Prepare attributes
+        b_size = self.block_size
+        batch, input_c, input_h, input_w = list(map(int, list(input.shape)))
+        out_c = input_c * b_size * b_size
+        out_h = input_h // b_size
+        out_w = input_w // b_size
+
+        # Actual model starts here
+        x = input.reshape(batch, input_c, out_h, b_size, out_w, b_size)
+        x = x.permute([0, 1, 3, 5, 2, 4])
+        x = x.reshape([batch, out_c, out_h, out_w])
+        return x
+
+
+_model_ = net_SpaceToDepth(2)
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 6, 6)
diff --git a/res/PyTorchExamples/examples/clamp/__init__.py b/res/PyTorchExamples/examples/clamp/__init__.py

new file mode 100644 (file)

index 0000000..92b7286
--- /dev/null
+++ b/res/PyTorchExamples/examples/clamp/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_clamp(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.clamp(input, 0, 10)
+
+
+_model_ = net_clamp()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/interpolate/__init__.py b/res/PyTorchExamples/examples/interpolate/__init__.py

new file mode 100644 (file)

index 0000000..ba0da42
--- /dev/null
+++ b/res/PyTorchExamples/examples/interpolate/__init__.py
@@ -0,0 +1,30 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This operation requires minimum 11 onnx opset version
+# - tf_onnx 1.9 fails to convert this model using opcode version 13+, because unsqueeze operation is not supported yet
+class net_interpolate(nn.Module):
+    def __init__(self, scale_factor):
+        super().__init__()
+        self.scale_factor = scale_factor
+
+    def forward(self, input):
+        return torch.nn.functional.interpolate(
+            input,
+            scale_factor=self.scale_factor,
+            mode='bilinear',
+            align_corners=True,
+            recompute_scale_factor=True)
+
+    def onnx_opset_version(self):
+        return 11
+
+
+_model_ = net_interpolate([2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/normalize/__init__.py b/res/PyTorchExamples/examples/normalize/__init__.py

new file mode 100644 (file)

index 0000000..288353a
--- /dev/null
+++ b/res/PyTorchExamples/examples/normalize/__init__.py
@@ -0,0 +1,17 @@
+import torch
+import torch.nn as nn
+
+
+# model
+class net_normalize(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return torch.nn.functional.normalize(input, p=2.0, dim=3, eps=1e-12)
+
+
+_model_ = net_normalize()
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 2, 3, 3)
diff --git a/res/PyTorchExamples/examples/strided_slice/__init__.py b/res/PyTorchExamples/examples/strided_slice/__init__.py

new file mode 100644 (file)

index 0000000..7277da8
--- /dev/null
+++ b/res/PyTorchExamples/examples/strided_slice/__init__.py
@@ -0,0 +1,25 @@
+import torch
+import torch.nn as nn
+
+
+# model
+#
+# Notes:
+# - This model requires opset version 10+. Previous version does not support strides.
+class net_strided_slice(nn.Module):
+    def __init__(self, begin, end, stride):
+        super().__init__()
+        self.key = [slice(begin[i], end[i], stride[i]) for i in range(len(begin))]
+
+    def forward(self, input):
+        # this is general way to do input[:, :, 1:5:2, 0:5:2]
+        return input[self.key]
+
+    def onnx_opset_version(self):
+        return 10
+
+
+_model_ = net_strided_slice([0, 0, 1, 0], [1, 3, 5, 5], [1, 1, 2, 2])
+
+# dummy input for onnx generation
+_dummy_ = torch.randn(1, 3, 5, 5)
diff --git a/res/PyTorchExamples/ptem.py b/res/PyTorchExamples/ptem.py

index f205bac7673ff4f974cc86f937e930acf8b2d965..b6fb43887e765a83a8076fa2d890eb88b9ef1161 100755 (executable)
--- a/res/PyTorchExamples/ptem.py
+++ b/res/PyTorchExamples/ptem.py
@@ -48,8 +48,16 @@ for example in args.examples:
      torch.save(module._model_, output_folder + example + ".pth")
      print("Generate '" + example + ".pth' - Done")
  
+    opset_version = 9
+    if hasattr(module._model_, 'onnx_opset_version'):
+        opset_version = module._model_.onnx_opset_version()
+
      torch.onnx.export(
-        module._model_, module._dummy_, output_folder + example + ".onnx", verbose=True)
+        module._model_,
+        module._dummy_,
+        output_folder + example + ".onnx",
+        verbose=True,
+        opset_version=opset_version)
      print("Generate '" + example + ".onnx' - Done")
  
      onnx_model = onnx.load(output_folder + example + ".onnx")
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe

new file mode 100644 (file)

index 0000000..b5f329b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/FullyConnected_006/test.recipe
@@ -0,0 +1,29 @@
+operand {
+  name: "in"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "out"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 2 }
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+    keep_num_dims: true
+  }
+  input: "in"
+  input: "weight"
+  input: ""
+  output: "out"
+}
+input: "in"
+input: "weight"
+output: "out"
diff --git a/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse b/res/TensorFlowLiteRecipes/FullyConnected_006/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Gather_000/test.recipe b/res/TensorFlowLiteRecipes/Gather_000/test.recipe

index 4c6c99da691921259d79be04a559b863ddaa88c3..b9b2412cfecf54fe81c6616bfe455b0aa576390a 100644 (file)
--- a/res/TensorFlowLiteRecipes/Gather_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Gather_000/test.recipe
@@ -24,5 +24,4 @@ operation {
    output: "ofm"
  }
  input: "param"
-input: "indices"
  output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.recipe b/res/TensorFlowLiteRecipes/Gather_001/test.recipe

new file mode 100644 (file)

index 0000000..cc23cf1
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Gather_001/test.recipe
@@ -0,0 +1,27 @@
+operand {
+  name: "param"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operand {
+  name: "indices"
+  type: INT32
+  shape { dim: 4 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 3 dim: 4 }
+}
+operation {
+  type: "Gather"
+  gather_options {
+    axis: 3
+  }
+  input: "param"
+  input: "indices"
+  output: "ofm"
+}
+input: "param"
+input: "indices"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Gather_001/test.reverse b/res/TensorFlowLiteRecipes/Gather_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe

index fa7fa7df74660ca98ab5844965717719724bf748..c5d3872938678926e45c325be14efa00b85de255 100644 (file)
--- a/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Net_Conv_QuantDequant_000/test.recipe
@@ -32,6 +32,7 @@ operand {
    name: "quantize"
    type: UINT8
    shape { dim: 1 dim: 16 dim: 16 dim: 8 }
+  quant { scale: 1 zero_point: 128 }
  }
  operand {
    name: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe

new file mode 100644 (file)

index 0000000..804d293
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Net_Gather_SparseToDense_AddV2_000/test.recipe
@@ -0,0 +1,131 @@
+operand {
+  name: "param_gather"
+  type: INT64
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "indices_gather"
+  type: INT64
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_gather"
+  type: INT64
+  shape { dim: 1 }
+}
+operand {
+  name: "shape_sparse"
+  type: INT64
+  shape { dim: 1 dim: 1 }
+  filler {
+      tag: "explicit"
+      arg: "3" arg: "5"
+  }
+}
+operand {
+  name: "values_sparse"
+  type: INT64
+  shape { dim: 1 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "defalut_value_sparse"
+  type: INT64
+  shape {  }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_sparse"
+  type: INT64
+  shape { dim: 3 }
+}
+operand {
+  name: "add_v2_2"
+  type: INT64
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "1" arg: "2" }
+}
+operand {
+  name: "ofm_add_v2"
+  type: INT64
+  shape { dim: 3 }
+}
+operand {
+  name: "ofm_cast"
+  type: INT32
+  shape { dim: 3 }
+}
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 2 dim: 5 }
+}
+operand {
+  name: "perm"
+  type: INT32
+  shape { dim: 3 }
+  filler { tag: "explicit" arg: "0" arg: "2" arg: "1" }
+}
+operand {
+  name: "ofm_trans"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 10 }
+}
+operation {
+  type: "Gather"
+  gather_options {
+    axis: 0
+  }
+  input: "param_gather"
+  input: "indices_gather"
+  output: "ofm_gather"
+}
+operation {
+  type: "SparseToDense"
+  sparse_to_dense_options {
+    validate_indices: false
+  }
+  input: "shape_sparse"
+  input: "values_sparse"
+  input: "ofm_gather"
+  input: "defalut_value_sparse"
+  output: "ofm_sparse"
+}
+operation {
+  type: "AddV2"
+  input: "ofm_sparse"
+  input: "add_v2_2"
+  output: "ofm_add_v2"
+}
+operation {
+  type: "Cast"
+  cast_options {
+    in_data_type: INT64
+    out_data_type: INT32
+  }
+  input: "ofm_add_v2"
+  output: "ofm_cast"
+}
+operation {
+  type: "Transpose"
+  transpose_options {
+  }
+  input: "ifm"
+  input: "perm"
+  output: "ofm_trans"
+}
+operation {
+  type: "Reshape"
+  input: "ofm_trans"
+  input: "ofm_cast"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe

new file mode 100644 (file)

index 0000000..d357a05
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Add_SVDF_000/test.recipe
@@ -0,0 +1,82 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "svdf"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm1"
+  input: "weight_feature"
+  input: "weight_time"
+  input: "bias"
+  input: "input_activation_state"
+  output: "svdf"
+}
+operation {
+  type: "Add"
+  add_options {
+    activation: NONE
+  }
+  input: "svdf"
+  input: "ifm2"
+  output: "ofm"
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe

new file mode 100644 (file)

index 0000000..a712d2a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Mul_Sqrt_FC_nobias_000/test.recipe
@@ -0,0 +1,63 @@
+operand {
+  name: "in1"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "in2"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "mul"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "weight"
+  type: FLOAT32
+  shape { dim: 4 dim: 4 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "sqrtout"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operand {
+  name: "fcout"
+  type: FLOAT32
+  shape { dim: 2 dim: 4 }
+}
+operation {
+  type: "Mul"
+  input: "in1"
+  input: "in2"
+  output: "mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Sqrt"
+  input: "mul"
+  output: "sqrtout"
+}
+operation {
+  type: "FullyConnected"
+  fullyconnected_options {
+    activation: NONE
+  }
+  input: "mul"
+  input: "weight"
+  input: ""
+  output: "fcout"
+}
+input: "in1"
+input: "in2"
+output: "fcout"
+output: "sqrtout"
diff --git a/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe

new file mode 100644 (file)

index 0000000..1d20443
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Part_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "split1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "split2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "split1"
+  output: "split2"
+}
+operation {
+  type: "Add"
+  input: "split1"
+  input: "split2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe

new file mode 100644 (file)

index 0000000..5c15092
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.recipe
@@ -0,0 +1,36 @@
+operand {
+  name: "ifm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+}
+operand {
+  name: "add_const"
+  type: UINT8
+  shape { dim: 1 dim: 1 dim: 1 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+  filler {
+    tag: "explicit"
+    arg: "0"
+    arg: "1"
+    arg: "2"
+    arg: "3"
+  }
+}
+operand {
+  name: "ofm"
+  type: UINT8
+  shape { dim: 1 dim: 4 dim: 4 dim: 4 }
+  quant { scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Add"
+  input: "ifm"
+  input: "add_const"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule

new file mode 100644 (file)

index 0000000..7bde662
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Add_000/test.rule
@@ -0,0 +1,10 @@
+# To check fake quantization.
+# All Ops are float32. Quantize/Dequantize Ops are inserted at the beginning/end of the model.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "IFM_FP32"              $(tensor_dtype ifm) '=' FLOAT32
+RULE    "ADD_CONST_FP32"        $(tensor_dtype add_const_DQ) '=' FLOAT32
+RULE    "ADD_FP32"              $(tensor_dtype ofm) '=' FLOAT32
+RULE    "QUANTIZE_OP"           $(op_count QUANTIZE) '=' 2
+RULE    "DEQUANTIZE_OP"         $(op_count DEQUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json

new file mode 100644 (file)

index 0000000..536fef2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe

new file mode 100644 (file)

index 0000000..3a3dba4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule

new file mode 100644 (file)

index 0000000..9124055
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization.
+# Conv is int16, and others u8. Quantize Ops are inserted before/after Conv.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT32"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_U8"                  $(tensor_dtype ofm_mul) '=' UINT8
+RULE    "ADD_U8"                  $(tensor_dtype ofm_add) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json

new file mode 100644 (file)

index 0000000..824f079
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm_mul",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe

new file mode 100644 (file)

index 0000000..3a3dba4
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.recipe
@@ -0,0 +1,92 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule

new file mode 100644 (file)

index 0000000..7df910a
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_001/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv and after Mul.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_INT16"               $(tensor_dtype ofm_mul) '=' INT16
+RULE    "MUL_CONST_INT16"         $(tensor_dtype mul_const) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm_add) '=' UINT8
+RULE    "ADD_CONST_UINT8"         $(tensor_dtype add_const) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 2
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json

new file mode 100644 (file)

index 0000000..824f079
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.qconf.json
@@ -0,0 +1,16 @@
+{
+    "default_quantization_dtype" : "uint8",
+    "default_granularity" : "channel",
+    "layers" : [
+        {
+            "name" : "ofm_conv",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        },
+        {
+            "name" : "ofm_mul",
+            "dtype" : "int16",
+            "granularity" : "channel"
+        }
+    ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe

new file mode 100644 (file)

index 0000000..9e114b3
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.recipe
@@ -0,0 +1,88 @@
+operand {
+  name: "ifm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 dim: 64 dim: 32 }
+}
+operand {
+  name: "filter"
+  type: FLOAT32
+  shape { dim: 64 dim: 1 dim: 1 dim: 32 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_conv"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "mul_non_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+}
+operand {
+  name: "add_const"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_mul"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operand {
+  name: "ofm_add"
+  type: FLOAT32
+  shape { dim: 1 dim: 32 dim: 32 dim: 64 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 2
+    stride_h: 2
+  }
+  input: "ifm_conv"
+  input: "filter"
+  input: "bias"
+  output: "ofm_conv"
+}
+operation {
+  type: "Mul"
+  input: "ofm_conv"
+  input: "mul_non_const"
+  output: "ofm_mul"
+  mul_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Add"
+  input: "ofm_mul"
+  input: "add_const"
+  output: "ofm_add"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm_conv"
+input: "mul_non_const"
+output: "ofm_add"
diff --git a/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule

new file mode 100644 (file)

index 0000000..b539872
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Conv_Mul_Add_002/test.rule
@@ -0,0 +1,14 @@
+# To check mixed-precision quantization.
+# Conv, Mul: int16, Add: u8
+# Quantize Ops are inserted before Conv, after Mul, before Mul's non-const input.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "CONV_INT16"              $(tensor_dtype ofm_conv) '=' INT16
+RULE    "WEIGHTS_INT16"           $(tensor_dtype filter) '=' INT16
+RULE    "BIAS_INT64"              $(tensor_dtype bias) '=' INT64
+RULE    "MUL_INT16"               $(tensor_dtype ofm_mul) '=' INT16
+RULE    "MUL_NON_CONST_UINT8"     $(tensor_dtype mul_non_const) '=' UINT8
+RULE    "ADD_UINT8"               $(tensor_dtype ofm_add) '=' UINT8
+RULE    "ADD_CONST_UINT8"         $(tensor_dtype add_const) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json

new file mode 100644 (file)

index 0000000..102e05f
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.qconf.json
@@ -0,0 +1,11 @@
+{
+  "default_quantization_dtype" : "uint8",
+  "default_granularity" : "channel",
+  "layers" : [
+    {
+      "name" : "ofm1",
+      "dtype" : "int16",
+      "granularity" : "channel"
+    }
+  ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe

new file mode 100644 (file)

index 0000000..ef79089
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "ofm1"
+  output: "ofm2"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ofm1"
+  input: "ofm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule

new file mode 100644 (file)

index 0000000..dc1ed87
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_000/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "SPLIT_OUT_1_INT16"       $(tensor_dtype ofm1) '=' INT16
+RULE    "SPLIT_OUT_2_INT16"       $(tensor_dtype ofm2) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json

new file mode 100644 (file)

index 0000000..272081b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.qconf.json
@@ -0,0 +1,11 @@
+{
+  "default_quantization_dtype" : "uint8",
+  "default_granularity" : "channel",
+  "layers" : [
+    {
+      "name" : "ofm2",
+      "dtype" : "int16",
+      "granularity" : "channel"
+    }
+  ]
+}
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe

new file mode 100644 (file)

index 0000000..ef79089
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.recipe
@@ -0,0 +1,47 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 6 dim: 1 dim: 2 }
+}
+operand {
+  name: "split_dim"
+  type: INT32
+  shape { }
+  filler { tag: "explicit" arg: "0" }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Split"
+  split_options {
+    num_splits: 2
+  }
+  input: "split_dim"
+  input: "ifm"
+  output: "ofm1"
+  output: "ofm2"
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 3 dim: 1 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ofm1"
+  input: "ofm2"
+  output: "ofm"
+  add_options {
+    activation: NONE
+  }
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule

new file mode 100644 (file)

index 0000000..dc1ed87
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quant_Split_Add_001/test.rule
@@ -0,0 +1,11 @@
+# To check mixed-precision quantization for multiple output node.
+# Split: int16, Add: u8
+# Quantize Ops are inserted before Split and after all Split output nodes.
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "INPUT_UINT8"             $(tensor_dtype ifm) '=' UINT8
+RULE    "SPLIT_OUT_1_INT16"       $(tensor_dtype ofm1) '=' INT16
+RULE    "SPLIT_OUT_2_INT16"       $(tensor_dtype ofm2) '=' INT16
+RULE    "ADD_UINT8"               $(tensor_dtype ofm) '=' UINT8
+RULE    "QUANTIZE_OP"             $(op_count QUANTIZE) '=' 3
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.recipe b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe

new file mode 100644 (file)

index 0000000..943341b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/Quantize_001/test.recipe
@@ -0,0 +1,66 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2  }
+}
+operand {
+  name: "ker"
+  type: FLOAT32
+  shape { dim: 1 dim: 1 dim: 1 dim: 2 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "ofm_c"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+}
+operation {
+  type: "Conv2D"
+  conv2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+  }
+  input: "ifm"
+  input: "ker"
+  input: "bias"
+  output: "ofm_c"
+}
+operand {
+  name: "ofm_q"
+  type: UINT8
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+  quant { min: 0 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Quantize"
+  input: "ofm_c"
+  output: "ofm_q"
+}
+operand {
+  name: "ofm"
+  type: INT16
+  shape { dim: 1 dim: 3 dim: 3 dim: 1 }
+  quant { min: -255 max: 255 scale: 1.0 zero_point: 0 }
+}
+operation {
+  type: "Quantize"
+  input: "ofm_q"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/Quantize_001/test.reverse b/res/TensorFlowLiteRecipes/Quantize_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.recipe b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe

new file mode 100644 (file)

index 0000000..cd45f1b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_000/test.recipe
@@ -0,0 +1,62 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "bias"
+  type: FLOAT32
+  shape { dim: 64 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "weight_feature"
+  input: "weight_time"
+  input: "bias"
+  input: "input_activation_state"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_000/test.reverse b/res/TensorFlowLiteRecipes/SVDF_000/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.recipe b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe

new file mode 100644 (file)

index 0000000..38b76c2
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SVDF_001/test.recipe
@@ -0,0 +1,52 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 16 }
+}
+operand {
+  name: "weight_feature"
+  type: FLOAT32
+  shape { dim: 64 dim: 16 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "weight_time"
+  type: FLOAT32
+  shape { dim: 64 dim: 8 }
+  filler {
+    tag: "gaussian"
+    arg: "0.0"
+    arg: "1.0"
+  }
+}
+operand {
+  name: "input_activation_state"
+  type: FLOAT32
+  is_variable: true
+  shape { dim: 1 dim: 512 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 64 }
+}
+operation {
+  type: "SVDF"
+  svdf_options {
+    rank: 1
+    activation: RELU
+    asymmetric_quantize_inputs: false
+  }
+  input: "ifm"
+  input: "weight_feature"
+  input: "weight_time"
+  input: ""
+  input: "input_activation_state"
+  output: "ofm"
+}
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/SVDF_001/test.reverse b/res/TensorFlowLiteRecipes/SVDF_001/test.reverse

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe

index ae993e6d8db1b8dbcbeab7aa130070ee92dd29b9..81e1e56e8de0eb13974e4cccbc88914259bb58f5 100644 (file)
--- a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_000/test.recipe
@@ -71,8 +71,7 @@ signature_def {
      name: "ofm1"
      tensor_index: 2
    }
-  method_name: "serving_default"
-  key: "serv"
+  signature_key: "serving_default"
    subgraph_index: 0
  }
  input: "ifm1"
diff --git a/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe

new file mode 100644 (file)

index 0000000..a1731f9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/SignatureDef_MultiOut_001/test.recipe
@@ -0,0 +1,81 @@
+operand {
+  name: "ifm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ifm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm1"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm2"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "ofm3"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "Add"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm1"
+  add_options {
+    activation: NONE
+  }
+}
+operation {
+  type: "Mul"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm2"
+  mul_options {
+    activation: 0
+  }
+}
+operation {
+  type: "Sub"
+  input: "ifm1"
+  input: "ifm2"
+  output: "ofm3"
+  sub_options {
+    activation: 0
+  }
+}
+signature_def {
+  inputs: {
+    name: "ifm1"
+    tensor_index: 0
+  }
+  inputs: {
+    name: "ifm2"
+    tensor_index: 1
+  }
+  outputs {
+    name: "out3"
+    tensor_index: 3
+  }
+  outputs {
+    name: "out2"
+    tensor_index: 4
+  }
+  outputs {
+    name: "out1"
+    tensor_index: 2
+  }
+  signature_key: "serving_default"
+  subgraph_index: 0
+}
+input: "ifm1"
+input: "ifm2"
+output: "ofm3"
+output: "ofm1"
+output: "ofm2"
diff --git a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe

index 1754f9a58c926d71cb64a234ef8347e883d5919d..6d258e73fd826d5e62583d74ff566a2008ab45b2 100644 (file)
--- a/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
+++ b/res/TensorFlowLiteRecipes/Sqrt_000/test.recipe
@@ -2,7 +2,6 @@ operand {
    name: "ifm"
    type: FLOAT32
    shape { dim: 1 dim: 3 dim: 3 dim: 2 }
-  filler { tag: "constant" arg: "3.5" }
  }
  operand {
    name: "ofm"
diff --git a/res/TensorFlowLiteSchema/2.7.0/schema.fbs b/res/TensorFlowLiteSchema/2.7.0/schema.fbs

new file mode 100644 (file)

index 0000000..3e0b999
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.7.0/schema.fbs
@@ -0,0 +1,1250 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: int;
+  seed2: int;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/2.8.0/schema.fbs b/res/TensorFlowLiteSchema/2.8.0/schema.fbs

new file mode 100644 (file)

index 0000000..af55a26
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.8.0/schema.fbs
@@ -0,0 +1,1264 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+// Version 3b: Rename fields in SignatureDef. Has backward compatibility with
+//             version 3 and 3a.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle

index 9b8840fc2ebfee1468cbdbe5011c9fe6dbd6c815..9a9465072c387f76741d4ae656f2789397f5accb 100644 (file)
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
          minSdkVersion 26
          targetSdkVersion 29
          versionCode 1
-        versionName "1.19.0"
+        versionName "1.20.0"
  
          externalNativeBuild {
              ndkBuild {
diff --git a/runtime/contrib/android_benchmark_app/CMakeLists.txt b/runtime/contrib/android_benchmark_app/CMakeLists.txt

index beb279cb9cb1d8a8e08c5556f3320ab0a1c6de67..63e4fc545de62f992a0ebdf19dcc7b21a52707aa 100644 (file)
--- a/runtime/contrib/android_benchmark_app/CMakeLists.txt
+++ b/runtime/contrib/android_benchmark_app/CMakeLists.txt
@@ -55,7 +55,7 @@ target_link_libraries(android_benchmark_native nnfw_lib_tflite)
  target_link_libraries(android_benchmark_native nnfw_lib_misc)
  target_link_libraries(android_benchmark_native log)
  
-nnas_find_package(FlatBuffersSource EXACT 1.12 REQUIRED)
+nnas_find_package(FlatBuffersSource EXACT 2.0 REQUIRED)
  target_include_directories(android_benchmark_native PUBLIC ${FlatBuffersSource_DIR}/include .)
  
  add_custom_target(android-benchmark-apk ALL
diff --git a/runtime/onert/api/CMakeLists.txt b/runtime/onert/api/CMakeLists.txt

index b238b1f893de23a381f21c4756fe7725bbd0ba0d..beb243a4d3a56eef8f3f4065259b2b9f61cee0b0 100644 (file)
--- a/runtime/onert/api/CMakeLists.txt
+++ b/runtime/onert/api/CMakeLists.txt
@@ -11,6 +11,7 @@ set(NNFW_API_HEADERS include/nnfw.h include/nnfw_experimental.h)
  target_link_libraries(${ONERT_DEV} PUBLIC nnfw-nnapi-header)
  target_link_libraries(${ONERT_DEV} PRIVATE onert_core)
  target_link_libraries(${ONERT_DEV} PRIVATE jsoncpp tflite_loader circle_loader ${LIB_PTHREAD})
+target_link_libraries(${ONERT_DEV} PRIVATE trix_loader)
  target_link_libraries(${ONERT_DEV} PRIVATE nnfw_common)
  target_link_libraries(${ONERT_DEV} PRIVATE nnfw_coverage)
  # NOTE Below line is added to remove warning for android build
diff --git a/runtime/onert/api/include/nnfw.h b/runtime/onert/api/include/nnfw.h

index 6eb7e6ba915801317e27d158d9138c84c5eee9ff..6f296a93167f81302b6babea829fa7daa0201445 100644 (file)
--- a/runtime/onert/api/include/nnfw.h
+++ b/runtime/onert/api/include/nnfw.h
@@ -92,6 +92,15 @@ typedef enum
     */
    NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6,
  
+  /**
+   * A tensor of 16 bit signed integers that represent real numbers.
+   *
+   * real_value = (integer_value - zeroPoint) * scale.
+   *
+   * Forced to have zeroPoint equal to 0.
+   */
+  NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7,
+
  } NNFW_TYPE;
  
  /**
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h

index b885a6b9049549e44665a132699ea2291a9e8293..45b34716a4cd0145d55170916e1a32c0a1ccaf10 100644 (file)
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
   * NNFW_VERSION is a uint32 value representing nnfw runtime version
   * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
   */
-#define NNFW_VERSION 0x01001300
+#define NNFW_VERSION 0x01001400
  
  #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/api/src/nnfw_api.cc b/runtime/onert/api/src/nnfw_api.cc

index b69dd83e4afe23ef708d5f784097470ed25cfe2d..0ebd385e9dbd06adc5fa0b5e89d09edc0a56c55c 100644 (file)
--- a/runtime/onert/api/src/nnfw_api.cc
+++ b/runtime/onert/api/src/nnfw_api.cc
@@ -28,6 +28,7 @@ STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_BOOL, 3);
  STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_UINT8, 4);
  STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_INT64, 5);
  STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED, 6);
+STATIC_ASSERT_ENUM_CHECK(NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED, 7);
  
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_NO_ERROR, 0);
  STATIC_ASSERT_ENUM_CHECK(NNFW_STATUS_ERROR, 1);
diff --git a/runtime/onert/api/src/nnfw_api_internal.cc b/runtime/onert/api/src/nnfw_api_internal.cc

index 1a3aaf9e994d42c4f644f7b34847172c5370df10..62a0439217ffe0fcb4fd543be41d137afbbe3468 100644 (file)
--- a/runtime/onert/api/src/nnfw_api_internal.cc
+++ b/runtime/onert/api/src/nnfw_api_internal.cc
@@ -23,6 +23,7 @@
  #include "exec/Execution.h"
  #include "circle_loader.h"
  #include "tflite_loader.h"
+#include "trix_loader.h"
  #include "json/json.h"
  #include "ir/OpCode.h"
  #include "util/TracingCtx.h"
@@ -155,6 +156,45 @@ void setConfigKeyValues(const CfgKeyValues &keyValues)
    onert::util::config_source_ext(std::move(configsrc));
  }
  
+NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
+{
+  using onert::ir::DataType;
+  switch (dt)
+  {
+    case DataType::FLOAT32:
+      return NNFW_TYPE_TENSOR_FLOAT32;
+    case DataType::INT32:
+      return NNFW_TYPE_TENSOR_INT32;
+    case DataType::QUANT_UINT8_ASYMM:
+      return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
+    case DataType::BOOL8:
+      return NNFW_TYPE_TENSOR_BOOL;
+    case DataType::UINT8:
+      return NNFW_TYPE_TENSOR_UINT8;
+    case DataType::INT64:
+      return NNFW_TYPE_TENSOR_INT64;
+    case DataType::QUANT_INT8_ASYMM:
+      return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
+    case DataType::QUANT_INT16_SYMM:
+      return NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED;
+    case DataType::UINT32:
+    case DataType::QUANT_INT8_SYMM:
+    default:
+      throw std::runtime_error("Error: Model has type that runtime API does not support.");
+  }
+}
+
+void fillTensorInfo(nnfw_tensorinfo *ti, const onert::ir::Shape &shape,
+                    const onert::ir::DataType &dtype)
+{
+  ti->rank = shape.rank();
+  for (int j = 0; j < ti->rank; ++j)
+  {
+    ti->dims[j] = shape.dim(j);
+  }
+  ti->dtype = datatype_to_nnfw_dtype(dtype);
+}
+
  } // namespace
  
  nnfw_session::nnfw_session()
@@ -225,6 +265,10 @@ NNFW_STATUS nnfw_session::load_model_from_modelfile(const char *model_file_path)
      {
        _subgraphs = onert::circle_loader::loadModel(filename.c_str());
      }
+    else if (model_type == ".tvn")
+    {
+      _subgraphs = onert::trix_loader::loadModel(filename.c_str());
+    }
      else
      {
        std::cerr << "Unsupported model type" << std::endl;
@@ -307,6 +351,10 @@ NNFW_STATUS nnfw_session::load_model_from_nnpackage(const char *package_dir)
      {
        _subgraphs = onert::circle_loader::loadModel(model_file_path);
      }
+    else if (model_type == "tvn")
+    {
+      _subgraphs = onert::trix_loader::loadModel(model_file_path);
+    }
      else
      {
        std::cerr << "Unsupported model type in MANIFEST" << std::endl;
@@ -657,32 +705,6 @@ NNFW_STATUS nnfw_session::set_output_layout(uint32_t index, NNFW_LAYOUT layout)
    return NNFW_STATUS_NO_ERROR;
  }
  
-static NNFW_TYPE datatype_to_nnfw_dtype(onert::ir::DataType dt)
-{
-  using onert::ir::DataType;
-  switch (dt)
-  {
-    case DataType::FLOAT32:
-      return NNFW_TYPE_TENSOR_FLOAT32;
-    case DataType::INT32:
-      return NNFW_TYPE_TENSOR_INT32;
-    case DataType::QUANT_UINT8_ASYMM:
-      return NNFW_TYPE_TENSOR_QUANT8_ASYMM;
-    case DataType::BOOL8:
-      return NNFW_TYPE_TENSOR_BOOL;
-    case DataType::UINT8:
-      return NNFW_TYPE_TENSOR_UINT8;
-    case DataType::INT64:
-      return NNFW_TYPE_TENSOR_INT64;
-    case DataType::QUANT_INT8_ASYMM:
-      return NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED;
-    case DataType::UINT32:
-    case DataType::QUANT_INT8_SYMM:
-    default:
-      throw std::runtime_error("Error: Model has type that runtime API does not support.");
-  }
-}
-
  NNFW_STATUS nnfw_session::apply_tensorinfo(uint32_t index, nnfw_tensorinfo ti)
  {
    // sanity check
@@ -769,22 +791,11 @@ NNFW_STATUS nnfw_session::input_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
      auto shape = primary_subgraph()->operands().at(opidx).shape();
      if (isStatePreparedOrFinishedRun())
      {
-      if (_execution)
-      {
-        shape = _execution->getInputShape(onert::ir::IOIndex{index});
-      }
-      else
-      {
-        shape = _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
-      }
-    }
-
-    ti->rank = shape.rank();
-    for (int j = 0; j < ti->rank; ++j)
-    {
-      ti->dims[j] = shape.dim(j);
+      shape = _execution ? _execution->getInputShape(onert::ir::IOIndex{index})
+                         : _executions.at(0)->getInputShape(onert::ir::IOIndex{index});
      }
-    ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
+    auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
+    fillTensorInfo(ti, shape, dtype);
    }
    catch (const std::exception &e)
    {
@@ -820,21 +831,12 @@ NNFW_STATUS nnfw_session::output_tensorinfo(uint32_t index, nnfw_tensorinfo *ti)
      // If it is called after `nnfw_run` then get the shape from Execution, not from the graph
      if (isStateFinishedRun())
      {
-      if (_execution)
-      {
-        shape = _execution->getOutputShape(onert::ir::IOIndex{index});
-      }
-      else
-      {
-        shape = _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
-      }
-    }
-    ti->rank = shape.rank();
-    for (int j = 0; j < ti->rank; ++j)
-    {
-      ti->dims[j] = shape.dim(j);
+      shape = _execution
+                ? _execution->getOutputShape(onert::ir::IOIndex{index})
+                : _executions.at(_executions.size() - 1)->getOutputShape(onert::ir::IOIndex{index});
      }
-    ti->dtype = datatype_to_nnfw_dtype(primary_subgraph()->operands().at(opidx).typeInfo().type());
+    auto dtype = primary_subgraph()->operands().at(opidx).typeInfo().type();
+    fillTensorInfo(ti, shape, dtype);
    }
    catch (const std::exception &e)
    {
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt

index 4b21e0ace453d3fab696215de2404af6a03ddec7..c43160ba7be9259810fd9aa314a5e68a13f7a937 100644 (file)
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -1,9 +1,14 @@
+# Backend common libs
  set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common)
+set(LIB_ONERT_BACKEND_CL_COMMON onert_backend_cl_common)
+add_subdirectory(cl_common)
+add_subdirectory(acl_common)
  
+# Backends
  add_subdirectory(cpu)
  add_subdirectory(acl_cl)
  add_subdirectory(acl_neon)
-add_subdirectory(acl_common)
  add_subdirectory(ruy)
  add_subdirectory(gpu_cl)
  add_subdirectory(xnnpack)
+add_subdirectory(trix)
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc

deleted file mode 100644 (file)

index 5595043..0000000
--- a/runtime/onert/backend/acl_cl/BackendContext.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-void BackendContext::initConsts()
-{
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
-  {
-    const auto &op = graph()->operations().at(op_ind);
-    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
-  }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
-  optimizer->optimize();
-
-  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (external_operands().contains(ind))
-      return;
-
-    const auto frontend_layout = graph()->layout();
-    const auto backend_layout = operand_layouts().at(ind);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
-  });
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    planTensors();
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    });
-  }
-
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h

index 2638046cae71dbb8e0a35b01f162a164471ad3d8..5da9158250eb2b2f18067d266fdcb9713a1d3909 100644 (file)
--- a/runtime/onert/backend/acl_cl/BackendContext.h
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -17,10 +17,11 @@
  #ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
  #define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
  
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
  #include "ConstantInitializer.h"
  #include "KernelGenerator.h"
+#include "TensorBuilder.h"
  
  namespace onert
  {
@@ -31,33 +32,8 @@ namespace acl_cl
  
  class Optimizer;
  
-class BackendContext : public onert::backend::BackendContext
-{
-public:
-  BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
-  {
-  }
-
-  ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
-
-public:
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
-  std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
  
  } // namespace acl_cl
  } // namespace backend
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc

index 54b2a7a088256950f3891bfcb88aa4619ebab38c..0431bb1986df68cc629224bd15dd9d24763bdd2a 100644 (file)
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -58,21 +58,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
  
    if (block_size_obj.isConstant())
    {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
    }
  
    const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc

index 12e805ee5c1edc5fd7e9700140c2f697e3d85090..a9ce888ee0cade8c42889ebb001c1c7146b22ee3 100644 (file)
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -16,12 +16,12 @@
  
  #include "Optimizer.h"
  
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
  
-#include <cassert>
  #include <compiler/LoweredGraph.h>
  #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
  
  namespace onert
  {
diff --git a/runtime/onert/backend/acl_common/AclBackendContext.h b/runtime/onert/backend/acl_common/AclBackendContext.h

new file mode 100644 (file)

index 0000000..b8d0274
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclBackendContext.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+#include <cl_common/BackendContext.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator,
+          typename T_Optimizer>
+class AclBackendContext
+  : public onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                     T_KernelGenerator>
+{
+public:
+  AclBackendContext(const Backend *backend, ContextData &&data,
+                    std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                    std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                    std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                    std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                T_KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
+  {
+    // DO NOTHING
+  }
+
+  ITensorRegistry *genTensors() override
+  {
+    optimizer->optimize();
+
+    this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (this->external_operands().contains(ind))
+        return;
+
+      const auto frontend_layout = this->graph()->layout();
+      const auto backend_layout = this->operand_layouts().at(ind);
+      ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                   obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+      this->tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+    });
+
+    // TODO Get compiler options from compiler, and use it rather than getting it from Env
+    if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+    {
+      this->planTensors();
+    }
+    else
+    {
+      // For the executors that does not have fixed linear execution order:
+      // To make tensors never be deallocated, this is a workaround to use static memory planner
+      this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+        if (this->tensor_builder->isRegistered(ind))
+          this->tensor_builder->notifyFirstUse(ind);
+      });
+    }
+
+    this->tensor_builder->prepare();
+
+    return this->tensor_registry.get();
+  }
+
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override
+  {
+    this->tensor_builder->registerTensorInfo(ind, info, backend_layout);
+  }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<T_Optimizer> optimizer;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h

index b7f66b50ecdfeae5a5fcd091630ac1cd6b429d97..65659ad50d44cb2b719e0604acb475f94f540577 100644 (file)
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -153,6 +153,23 @@ void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &o
    Init<T>(model_obj, obj, copy, frontend_layout);
  }
  
+// Pre-defined initializer - fill reverse order
+template <typename T> void initReverseOrder(const ir::Operand &model_obj, backend::ITensor &obj)
+{
+  assert(model_obj.data());
+  const auto &shape = model_obj.shape();
+  const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+  assert(model_obj.shape().rank() == 1);
+  obj.access([&](ITensor &tensor) {
+    for (size_t i = 0; i < shape.num_elements(); ++i)
+    {
+      const T value = base[shape.num_elements() - i - 1];
+      T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
+      *into = value;
+    }
+  });
+}
+
  class AclConstantInitializer : public ir::OperationVisitor
  {
  public:
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h

index 60f4ebf7eeb7868b251f5f32f280d2bc6335fbc2..a0bbe7c3c3035039b4c1ea097e9f1b6c4f644f58 100644 (file)
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -17,9 +17,10 @@
  #ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
  #define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
  
+#include <cl_common/ParentInfo.h>
+
  #include <ir/OperationVisitor.h>
  #include <ir/Graph.h>
-#include "ParentInfo.h"
  
  namespace onert
  {
@@ -94,21 +95,21 @@ public:
        }
        coordinate_info.set(axis, axis_point);
  
-      _parent_map.emplace(
-        input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+      _parent_map.emplace(input_index,
+                          cl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
  
        axis_point += input_shape.dim(axis);
      }
    }
  
-  std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap()
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&releaseParentMap()
    {
      return std::move(_parent_map);
    }
  
  private:
    const ir::Graph &_graph;
-  std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map;
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> _parent_map;
    ir::Layout _current_op_layout{ir::Layout::UNKNOWN};
    bool usePadding{false};
  };
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h

index 7c1c5dd9a2540c29aa1e556ad34fd02aad9dd622..e008fd6f5cdf38c523116d3313069b877caff628 100644 (file)
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -17,18 +17,21 @@
  #ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
  #define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
  
-#include <memory>
-#include <queue>
-
-#include <arm_compute/core/Types.h>
-#include "ir/OperandIndexMap.h"
-#include <ir/Operands.h>
  #include "AclTensorManager.h"
  #include "AclTensorRegistry.h"
-#include <memory>
-#include "ParentInfo.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/OperandIndexMap.h>
+#include <ir/Operands.h>
  #include <util/Utils.h>
  
+#include <arm_compute/core/Types.h>
+
+#include <memory>
+#include <queue>
+
  namespace onert
  {
  namespace backend
@@ -36,16 +39,12 @@ namespace backend
  namespace acl_common
  {
  
-enum class UsesType
-{
-  FIRST,
-  LAST
-};
-
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
  {
  public:
    using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
+  // TODO Remove this alias and direct usage of this type
+  using UsesType = cl_common::UsesType;
  
    AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
  
@@ -76,7 +75,7 @@ public:
      _uses_count_map[index] = num_uses;
    }
  
-  void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
    {
      _parent_map = std::move(parent_map);
    }
@@ -104,10 +103,10 @@ private:
    std::unique_ptr<T_AclTensorManager> _tensor_mgr;
  
    // for linear executor
-  std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
+  cl_common::LifetimeSeq _lifetime_seq;
  
    // Extra info for concat elimination
-  ir::OperandIndexMap<ParentInfo> _parent_map;
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
  };
  
  } // namespace acl_common
@@ -217,55 +216,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
  template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
  void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
  {
-  // Update lifetime sequence to apply subtensor optimization
-
-  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
-  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
-    ir::OperandIndex &ret = root_map[ind];
-
-    // We know the root parent value already
-    if (ret.valid())
-      return ret;
-
-    auto itr = _parent_map.find(ind);
-    if (itr == _parent_map.end())
-    {
-      // If there is no parent, let's store the value of itself
-      return ret = ind;
-    }
-    else
-    {
-      return ret = find_root(itr->second.parent);
-    }
-  };
-
-  ir::OperandIndexMap<bool> first_use_check;
-  ir::OperandIndexMap<bool> last_use_check;
-  std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
-  for (size_t i = 0; i < _lifetime_seq.size(); i++)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::FIRST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (first_use_check[root_ind])
-      continue;
-    first_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::FIRST, root_ind};
-  }
-
-  for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::LAST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (last_use_check[root_ind])
-      continue;
-    last_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::LAST, root_ind};
-  }
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
  
    for (auto &entry : lifetime_map)
    {
diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt

index d3ae5acf705e780c8d9e3e81007ab2047409c20d..8d409a47c912217765cd114585d647a2f784e0b8 100644 (file)
--- a/runtime/onert/backend/acl_common/CMakeLists.txt
+++ b/runtime/onert/backend/acl_common/CMakeLists.txt
@@ -12,6 +12,7 @@ target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURREN
  target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core)
  target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
  target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${LIB_ONERT_BACKEND_CL_COMMON})
  target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
  target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
  
diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/acl_common/ParentInfo.h

deleted file mode 100644 (file)

index 7084363..0000000
--- a/runtime/onert/backend/acl_common/ParentInfo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-
-#include <ir/Index.h>
-#include <ir/Coordinates.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_common
-{
-
-/**
- * @brief      Struct to represent parent operand in child operand
- */
-struct ParentInfo
-{
-  ir::OperandIndex parent;
-  ir::Layout frontend_layout;
-  ir::Coordinates coordinates;
-};
-
-} // namespace acl_common
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc

deleted file mode 100644 (file)

index 4de3de0..0000000
--- a/runtime/onert/backend/acl_neon/BackendContext.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-void BackendContext::initConsts()
-{
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
-  {
-    auto op_inputs =
-      graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED |
-                      ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
-  }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
-  optimizer->optimize();
-
-  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (external_operands().contains(ind))
-      return;
-
-    const auto frontend_layout = graph()->layout();
-    const auto backend_layout = operand_layouts().at(ind);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
-  });
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    planTensors();
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    });
-  }
-
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h

index 35d777f7bd544d10f360118d5899dcae7faf0655..b73dd188ee82e8942ea0a7ace649d718149e4681 100644 (file)
--- a/runtime/onert/backend/acl_neon/BackendContext.h
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -17,10 +17,11 @@
  #ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
  #define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
  
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
  #include "ConstantInitializer.h"
  #include "KernelGenerator.h"
+#include "TensorBuilder.h"
  
  namespace onert
  {
@@ -31,34 +32,8 @@ namespace acl_neon
  
  class Optimizer;
  
-class BackendContext : public onert::backend::BackendContext
-{
-public:
-  BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
-  {
-  }
-
-  ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
-
-public:
-  // TODO Make it private
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
-  std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
  
  } // namespace acl_neon
  } // namespace backend
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc

index 35da7c9524a26137cd917a4fb2f4a86e70c4881b..1bd702756da32350ce9c628be4d430dde64d1df6 100644 (file)
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -37,21 +37,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
  
    if (block_size_obj.isConstant())
    {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
    }
  
    const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc

index 781103f9c41175a47d10f97bc87d9ac0bf65c6af..283edd174b42ef5037bc90282cbc18249f54e23e 100644 (file)
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -16,12 +16,12 @@
  
  #include "Optimizer.h"
  
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
  
-#include <cassert>
  #include <compiler/LoweredGraph.h>
  #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
  
  namespace onert
  {
diff --git a/runtime/onert/backend/cl_common/CMakeLists.txt b/runtime/onert/backend/cl_common/CMakeLists.txt

new file mode 100644 (file)

index 0000000..c751296
--- /dev/null
+++ b/runtime/onert/backend/cl_common/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(${LIB_ONERT_BACKEND_CL_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set_target_properties(${LIB_ONERT_BACKEND_CL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC onert_core)
diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h

new file mode 100644 (file)

index 0000000..7bb72d7
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator>
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+                                                                                    kernel_gen}
+  {
+  }
+
+  FunctionMap genKernels() override
+  {
+    FunctionMap ret;
+
+    // kernel_gen
+    for (auto op_ind : _data.op_order)
+    {
+      auto fn_seq = kernel_gen->generate(op_ind);
+      ret.emplace_back(op_ind, std::move(fn_seq));
+    }
+
+    tensor_builder->allocate();
+    initConsts();
+
+    // NOTE For memory optimization, we want to free some operand data
+    const_cast<ir::Graph &>(*_data.graph)
+      .operands()
+      .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+    for (auto &it : ret)
+    {
+      auto &fn_seq = it.second;
+      fn_seq->iterate([&](exec::IFunction &ifunc) {
+        ifunc.prepare();
+        tensor_builder->postFunctionPrepare();
+      });
+    }
+
+    return ret;
+  }
+
+protected:
+  void initConsts()
+  {
+    _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+      constant_initializer->setLayout(graph()->layout());
+      op.accept(*constant_initializer);
+    });
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (_data.external_operands.contains(ind) || !operand.isConstant())
+        return;
+      const auto &obj = graph()->operands().at(ind);
+      if (obj.isConstant() && !constant_initializer->exist(ind))
+      {
+        constant_initializer->registerDefaultInitializer(ind, obj);
+      }
+    });
+
+    constant_initializer->run();
+  }
+
+  virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                  ir::Layout backend_layout) = 0;
+
+  void planTensors()
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexMap<uint32_t> def_map;
+    ir::OperandIndexSequence constants;
+
+    // Prepare scanning
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (_data.external_operands.contains(ind))
+        return;
+
+      uses_map[ind] = obj.getUses().size();
+      def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+      if (obj.isConstant())
+        constants.append(ind);
+
+      if (!tensor_builder->isRegistered(ind))
+      {
+        // These tensors do not exist in any operation (No use and def)
+        const auto info = obj.info();
+        const auto layout = _data.operand_layouts.at(ind);
+        // TODO Change tensor info to have permuted shape
+        registerTensorInfo(ind, info, layout);
+      }
+    });
+
+    // Start scanning to do notify{First|Last}Use for each tensor
+
+    // If a tensor is a constant, increase the use of the tensor and allocate it first.
+    // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+    // deallocated last.
+    VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+    for (const auto &ind : constants)
+    {
+      uses_map[ind]++;
+      tensor_builder->notifyFirstUse(ind);
+    }
+
+    // At each operation,
+    // 1. Scan DEF of outputs. If the DEF, allocate it
+    // 2. Scan DEF of inputs. If variable tensor, allocate it
+    // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+    for (const auto op_ind : _data.op_order)
+    {
+      const auto &op = graph()->operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (uses_map[ind] == 0)
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    });
+
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+      if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    }
+
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+    assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+  }
+
+public:
+  // TODO Make it protected
+  std::shared_ptr<T_TensorBuilder> tensor_builder;
+  std::shared_ptr<T_ConstantInitializer> constant_initializer;
+  std::shared_ptr<T_KernelGenerator> kernel_gen;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h

new file mode 100644 (file)

index 0000000..5fe5eec
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+#define __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+
+#include "cl_common/ParentInfo.h"
+
+#include <ir/OperandIndexMap.h>
+
+#include <map>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Abstract UserType into LifetimeMap and LifetimeSeq
+enum class UsesType
+{
+  FIRST,
+  LAST
+};
+
+// TODO Define class or struct for LifetimeMap and LifetimeSeq
+using LifetimeMap = std::map<size_t, std::pair<UsesType, ir::OperandIndex>>;
+using LifetimeSeq = std::vector<std::pair<UsesType, ir::OperandIndex>>;
+
+LifetimeMap createLifetimeMap(LifetimeSeq &seq, ir::OperandIndexMap<ParentInfo> &parent_map);
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h

new file mode 100644 (file)

index 0000000..510211c
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+#define __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+
+#include <ir/Index.h>
+#include <ir/Coordinates.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+/**
+ * @brief      Struct to represent parent operand in child operand
+ */
+struct ParentInfo
+{
+  ir::OperandIndex parent;
+  ir::Layout frontend_layout;
+  ir::Coordinates coordinates;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/cl_common/src/LifetimeMap.cc b/runtime/onert/backend/cl_common/src/LifetimeMap.cc

new file mode 100644 (file)

index 0000000..0b17c58
--- /dev/null
+++ b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cl_common/LifetimeMap.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+LifetimeMap createLifetimeMap(LifetimeSeq &lifetime_seq,
+                              ir::OperandIndexMap<ParentInfo> &parent_map)
+{
+  // Update lifetime sequence to apply subtensor optimization
+  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
+  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
+    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+    ir::OperandIndex &ret = root_map[ind];
+
+    // We know the root parent value already
+    if (ret.valid())
+      return ret;
+
+    auto itr = parent_map.find(ind);
+    if (itr == parent_map.end())
+    {
+      // If there is no parent, let's store the value of itself
+      return ret = ind;
+    }
+    else
+    {
+      return ret = find_root(itr->second.parent);
+    }
+  };
+
+  ir::OperandIndexMap<bool> first_use_check;
+  ir::OperandIndexMap<bool> last_use_check;
+  LifetimeMap lifetime_map;
+  for (size_t i = 0; i < lifetime_seq.size(); i++)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::FIRST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (first_use_check[root_ind])
+      continue;
+    first_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::FIRST, root_ind};
+  }
+
+  for (int i = lifetime_seq.size() - 1; i >= 0; i--)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::LAST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (last_use_check[root_ind])
+      continue;
+    last_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::LAST, root_ind};
+  }
+
+  return lifetime_map;
+}
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc

index 8ac8758421ad1b803ef886120611f38be6caaac7..aa4ef352ef5ebd5495c33d76604ac568d331c43e 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -194,7 +194,7 @@ void CalculateActivationRangeQuantized(ir::Activation activation, const IPortabl
    }
    else
    {
-    std::cout << "Unsupported fused activation function." << std::endl;
+    throw std::runtime_error{"Unsupported fused activation function."};
    }
  }
  
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h

index ac2fbb84f58181e0ac6369989bf67b619153b706..1fefc3228b8428c7bdcd55d67196acf3c9d9d0c8 100644 (file)
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -18,19 +18,19 @@
  #define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
  
  #include <backend/IPortableTensor.h>
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-#include <iostream>
  #include <ir/DataType.h>
-#include <ir/InternalType.h>
  #include <ir/Operand.h>
  #include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
  
  #include <limits>
  #include <vector>
  
  using OperandType = onert::ir::DataType;
+using namespace onert::util;
  
  namespace onert
  {
@@ -166,40 +166,6 @@ void GetQuantizedConvolutionMultipliersAndShifts(
    int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
    std::vector<int> &per_channel_output_shift);
  
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    std::cout << "Unsupported fused activation function." << std::endl;
-  }
-}
-
  void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
                                         int32_t *act_min, int32_t *act_max);
  
diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h

index dc0b8596cebe02543511e6440cfc6b98d3af1946..d67ba1602df4b3218d8147b43f61ba8dc324e185 100644 (file)
--- a/runtime/onert/backend/gpu_cl/Backend.h
+++ b/runtime/onert/backend/gpu_cl/Backend.h
@@ -22,13 +22,13 @@
  
  #include "BackendContext.h"
  #include "Config.h"
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
  #include "KernelGenerator.h"
  #include "TensorManager.h"
  #include "TensorBuilder.h"
  
-#include "open_cl/Environment.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
  
  namespace onert
  {
@@ -50,22 +50,22 @@ public:
      const auto &operands = data.graph->operands();
      auto context = std::make_unique<gpu_cl::BackendContext>(this, std::move(data));
  
-    auto environment = std::make_shared<Environment>();
+    auto environment = std::make_shared<tflite::gpu::cl::Environment>();
      if (!CreateEnvironment(environment.get()).ok())
      {
        return nullptr;
      }
      auto tm = createTensorManager(&environment->context());
  
-    auto tr = std::make_shared<ClTensorRegistry<TensorManager>>(tm);
+    auto tr = std::make_shared<TensorRegistry>(tm);
  
-    InferenceContext::CreateInferenceInfo create_info;
-    create_info.precision = CalculationsPrecision::F32;
+    tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
+    create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
      create_info.storage_type =
-      GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
-    create_info.hints.Add(ModelHints::kFastestInference);
+      tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
+    create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
  
-    auto cc = std::make_shared<CreationContext>();
+    auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
      cc->device = environment->GetDevicePtr();
      cc->context = &environment->context();
      cc->queue = environment->queue();
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc

index 6c3ac81a289f5dca26292889b25d8af42246f142..ec944215562d8d2b56ad81b2b3087e1dda529de5 100644 (file)
--- a/runtime/onert/backend/gpu_cl/BackendContext.cc
+++ b/runtime/onert/backend/gpu_cl/BackendContext.cc
@@ -33,147 +33,26 @@ namespace backend
  namespace gpu_cl
  {
  
-void BackendContext::initConsts()
+void BackendContext::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                        ir::Layout backend_layout)
  {
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
+  TensorType type = TensorType::TENSOR_TYPE_VALID;
+  tensor_builder->registerTensorInfo(ind, info, backend_layout, type);
  }
  
-void BackendContext::planTensors()
+ITensorRegistry *BackendContext::genTensors()
  {
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
+  ir::OperandIndexMap<TensorType> type_map;
  
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
+  for (const auto &ind : graph()->getInputs())
    {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
+    type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
    }
  
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
+  for (const auto &ind : graph()->getOutputs())
    {
-    const auto &op = graph()->operations().at(op_ind);
-    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
+    type_map[ind] = TensorType::TENSOR_TYPE_OUTPUT;
    }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
      if (external_operands().contains(ind))
        return;
@@ -182,7 +61,11 @@ ITensorRegistry *BackendContext::genTensors()
      const auto backend_layout = operand_layouts().at(ind);
      ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
                                   obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+    if (obj.isConstant())
+    {
+      type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
+    }
+    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout, type_map[ind]);
    });
  
    // TODO Get compiler options from compiler, and use it rather than getting it from Env
@@ -199,44 +82,10 @@ ITensorRegistry *BackendContext::genTensors()
          tensor_builder->notifyFirstUse(ind);
      });
    }
-
    tensor_builder->prepare();
-
    return tensor_registry.get();
  }
  
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  // kernel_gen
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
  } // namespace gpu_cl
  } // namespace backend
  } // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h

index f17489e7ac754792674a2c96582f14e5fba6c865..7412d2bce8f1d055b8fecd87c56c5bbcb39fb101 100644 (file)
--- a/runtime/onert/backend/gpu_cl/BackendContext.h
+++ b/runtime/onert/backend/gpu_cl/BackendContext.h
@@ -20,10 +20,12 @@
  #include <backend/BackendContext.h>
  #include <util/ConfigSource.h>
  
+#include <cl_common/BackendContext.h>
+
  #include "ConstantInitializer.h"
  #include "KernelGenerator.h"
  #include "TensorBuilder.h"
-#include "open_cl/InferenceContext.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
  
  namespace onert
  {
@@ -32,31 +34,28 @@ namespace backend
  namespace gpu_cl
  {
  
-class BackendContext : public onert::backend::BackendContext
+class BackendContext
+  : public onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                     KernelGenerator>
  {
  public:
    BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorRegistry> tensor_registry = nullptr,
                   std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
                   std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                   std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
+    : onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
    {
+    // DO NOTHING
    }
  
    ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
  
-public:
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override;
  };
  
  } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt

index 49bae37f8fd7193fe411f69b6ac098bcb2927fa7..eb19642142bcacb636992210dc20a5676a3925b7 100644 (file)
--- a/runtime/onert/backend/gpu_cl/CMakeLists.txt
+++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt
@@ -1,14 +1,14 @@
  set(LIB_ONERT_BACKEND_GPU_CL onert_backend_gpu_cl)
  
+if(NOT BUILD_GPU_CL)
+  return()
+endif(NOT BUILD_GPU_CL)
+
  nnas_find_package(Opencl_Headers QUIET)
  if(NOT Opencl_Headers_FOUND)
    return()
  endif(NOT Opencl_Headers_FOUND)
  
-if(NOT BUILD_GPU_CL)
-  return()
-endif(NOT BUILD_GPU_CL)
-
  nnas_find_package(Farmhash QUIET)
  if(NOT Farmhash_FOUND)
    return()
@@ -19,18 +19,32 @@ if(NOT Abseil_FOUND)
    return()
  endif(NOT Abseil_FOUND)
  
-file(GLOB_RECURSE SOURCES "*.cc")
+nnfw_find_package(Fp16 QUIET)
+if(NOT Fp16_FOUND)
+  return()
+endif(NOT Fp16_FOUND)
  
+nnas_find_package(TensorFlowGpu QUIET)
+if(NOT TensorFlowGpu_FOUND)
+  message(FATAL_ERROR 'TensorFlowGpu lib not found')
+  return()
+endif(NOT TensorFlowGpu_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
  
  add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
  
  target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
  
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
  target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
  
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc

index b3ef2f560ca429b76e6880131a074829e71c92dc..05dd8e2a321dacebd68b272890e6b0e0dd3c9192 100644 (file)
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
@@ -93,6 +93,9 @@ void ClConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &i
      case DataType::FLOAT32:
        _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
        break;
+    case DataType::INT32:
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+      break;
      default:
        throw std::runtime_error("Not supported, yet");
        break;
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h

index d7d21e847758bf4faaeb08ccdde33e0512e5c128..95e228acd181dbb2ce44458c2d612b7234afff9f 100644 (file)
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
@@ -17,8 +17,6 @@
  #ifndef __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
  #define __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
  
-#include "ClTensorRegistry.h"
-
  #include <unordered_map>
  #include <functional>
  
diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h

index 9d3d69092d73c728bb05b336465a91e513a76c8f..5e8a11a8405d67b243802eca439ae92112645dbd 100644 (file)
--- a/runtime/onert/backend/gpu_cl/ClFunction.h
+++ b/runtime/onert/backend/gpu_cl/ClFunction.h
@@ -22,9 +22,9 @@
  #include <vector>
  #include <memory>
  
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
  
  namespace onert
  {
@@ -32,19 +32,18 @@ namespace backend
  {
  namespace gpu_cl
  {
-
  class ClFunction : public ::onert::exec::IFunction
  {
  public:
    ClFunction() : _gpu_operations(), _creation_context() {}
  
  public:
-  void configure(std::shared_ptr<CreationContext> creation_context)
+  void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
    {
      _creation_context = creation_context;
    }
  
-  void add_operation(std::unique_ptr<GPUOperation> gpu_operation)
+  void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
    {
      _gpu_operations.push_back(std::move(gpu_operation));
    }
@@ -57,6 +56,10 @@ public:
        {
          throw std::runtime_error("Failed to AddToQueue.");
        }
+      if (!_creation_context->queue->WaitForCompletion().ok())
+      {
+        throw std::runtime_error("Failed to WaitForCompletion.");
+      }
      }
    }
  
@@ -77,8 +80,8 @@ public:
    }
  
  private:
-  std::vector<std::unique_ptr<GPUOperation>> _gpu_operations;
-  std::shared_ptr<CreationContext> _creation_context;
+  std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
  };
  
  } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/ClMemoryManager.h b/runtime/onert/backend/gpu_cl/ClMemoryManager.h

deleted file mode 100644 (file)

index 3bac0d5..0000000
--- a/runtime/onert/backend/gpu_cl/ClMemoryManager.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-
-#include <cassert>
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Shape.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/InferenceContext.h"
-#include "open_cl/Status.h"
-#include "open_cl/StorageTypeUtil.h"
-#include "open_cl/TensorType.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClMemoryManager
-{
-public:
-  ClMemoryManager(CLContext *context) : _context{context} {}
-
-  virtual ~ClMemoryManager() = default;
-
-  virtual void allocate(void)
-  {
-    for (const auto &tensor_entry : _tensors)
-    {
-      auto tensor = tensor_entry.second;
-      const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
-      const auto &shape = t->shape;
-      const auto &descriptor = t->descriptor;
-      if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
-      {
-        return;
-      }
-    }
-  }
-
-  virtual void deallocate(void)
-  {
-    // NYI
-  }
-
-  virtual void startLifetime(const ir::OperandIndex &)
-  { /* DO NOTHING */
-  }
-  virtual void finishLifetime(const ir::OperandIndex &)
-  { /* DO NOTHING */
-  }
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<Environment> environment, DeviceInfo &device_info)
-  {
-    ValueId max_id = 0;
-    auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
-    const auto shape = info.shape();
-
-    auto tensor = std::make_shared<T_Tensor>(shape.rank(), shape, environment);
-    _tensors[ind] = tensor;
-
-    BHWC t_shape;
-    switch (shape.rank())
-    {
-      case 1:
-        // B layout
-        t_shape = BHWC(shape.dim(0), 1, 1, 1);
-        break;
-      case 2:
-        // BC layout
-        t_shape = BHWC(shape.dim(0), 1, 1, shape.dim(1));
-        break;
-      case 3:
-        // BWC layout
-        t_shape = BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
-        break;
-      case 4:
-        // BHWC layout
-        t_shape = BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
-        break;
-      default:
-        break;
-    }
-
-    TensorStorageType storage_type = create_info.storage_type;
-    Layout layout = t_shape.b == 1 ? Layout::HWC : Layout::BHWC;
-
-    ValueId id = ind.value();
-    storage_type = SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
-    auto dummy = std::make_shared<InferenceContext::DummyTensor>();
-    dummy->shape = t_shape;
-    dummy->descriptor = TensorDescriptor{data_type, storage_type, layout};
-    tensor_reserver_.Add(id, dummy);
-
-    max_id = std::max(max_id, id);
-
-    tensor_reserver_.SetNext(max_id + 1);
-  }
-
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; }
-
-  InferenceContext::TensorReserver &tensorReservers(void) { return tensor_reserver_; }
-
-private:
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors;
-  InferenceContext::TensorReserver tensor_reserver_;
-  CLContext *_context;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h b/runtime/onert/backend/gpu_cl/ClTensorBuilder.h

deleted file mode 100644 (file)

index 951bbd8..0000000
--- a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-
-#include <memory>
-#include <queue>
-
-#include "ClTensorManager.h"
-#include "ClTensorRegistry.h"
-#include "ParentInfo.h"
-
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/InferenceContext.h"
-
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-#include <ir/Operands.h>
-#include <util/Utils.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class UsesType
-{
-  FIRST,
-  LAST
-};
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorBuilder
-{
-public:
-  using T_ClTensorManager = ClTensorManager<T_ITensor, T_Tensor>;
-
-  ClTensorBuilder(const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
-                  InferenceContext::CreateInferenceInfo create_info,
-                  const std::shared_ptr<Environment> &environment);
-
-  /**
-   * @brief     Register tensor information to allocate on ACL-CL backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Tensor information
-   * @param[in] layout Tensor data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare();
-  void allocate();
-  void postFunctionPrepare();
-
-  T_ClTensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
-
-  void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
-  {
-    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
-                                                                : true);
-    _uses_count_map[index] = num_uses;
-  }
-
-  void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
-  {
-    _parent_map = std::move(parent_map);
-  }
-
-  bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
-
-  /**
-   * @brief     Check child tensor is allocated as subtensor of parent tensor
-   * @param[in] parent  Index of parent
-   * @param[in] child   Index of child
-   * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
-   */
-  bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
-
-private:
-  void buildTensors(void);
-  ir::OperandIndex findRootParent(ir::OperandIndex index);
-
-private:
-  const ir::Operands &_operands;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
-  ir::OperandIndexMap<size_t> _uses_count_map;
-
-  std::unique_ptr<T_ClTensorManager> _tensor_mgr;
-  InferenceContext::CreateInferenceInfo _create_info;
-  std::shared_ptr<Environment> _environment;
-
-  // for linear executor
-  std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
-
-  // Extra info for concat elimination
-  ir::OperandIndexMap<ParentInfo> _parent_map;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include <stack>
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorBuilder<T_ITensor, T_Tensor>::ClTensorBuilder(
-  const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
-  InferenceContext::CreateInferenceInfo create_info,
-  const std::shared_ptr<Environment> &environment)
-  : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
-                                                                               environment}
-{
-  assert(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::registerTensorInfo(const ir::OperandIndex &ind,
-                                                              const ir::OperandInfo &info,
-                                                              ir::Layout backend_layout)
-{
-  assert(_tensor_mgr->constTensors().size() == 0);
-  assert(_tensor_mgr->nonconstTensors().size() == 0);
-
-  _uses_count_map[ind] = _operands.at(ind).getUses().size();
-
-  _tensor_info_map.emplace(ind, info);
-  _tensor_layout_map.insert({ind, backend_layout});
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  _lifetime_seq.emplace_back(UsesType::FIRST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyLastUse(const ir::OperandIndex &ind)
-{
-  _lifetime_seq.emplace_back(UsesType::LAST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-bool ClTensorBuilder<T_ITensor, T_Tensor>::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::prepare(void)
-{
-  buildTensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::allocate(void)
-{
-  // Update lifetime sequence to apply subtensor optimization
-
-  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
-  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
-    ir::OperandIndex &ret = root_map[ind];
-
-    // We know the root parent value already
-    if (ret.valid())
-      return ret;
-
-    auto itr = _parent_map.find(ind);
-    if (itr == _parent_map.end())
-    {
-      // If there is no parent, let's store the value of itself
-      return ret = ind;
-    }
-    else
-    {
-      return ret = find_root(itr->second.parent);
-    }
-  };
-
-  ir::OperandIndexMap<bool> first_use_check;
-  ir::OperandIndexMap<bool> last_use_check;
-  std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
-  for (size_t i = 0; i < _lifetime_seq.size(); i++)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::FIRST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (first_use_check[root_ind])
-      continue;
-    first_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::FIRST, root_ind};
-  }
-
-  for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::LAST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (last_use_check[root_ind])
-      continue;
-    last_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::LAST, root_ind};
-  }
-
-  for (auto &entry : lifetime_map)
-  {
-    auto &use = entry.second;
-    auto use_type = use.first;
-    auto use_index = use.second;
-    assert(use_index.valid());
-    if (use_type == UsesType::FIRST)
-      _tensor_mgr->startLifetime(use_index);
-    else
-      _tensor_mgr->finishLifetime(use_index);
-  }
-
-  _tensor_mgr->allocateConsts();
-
-  // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
-  //      After refactoring BackendContext we can uncomment this
-  // assert(_tensor_info_map.size() ==
-  //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
-  //       _parent_map.size());
-  _tensor_mgr->allocateNonconsts();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::postFunctionPrepare(void)
-{
-  _tensor_mgr->tryDeallocConstants();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::buildTensors(void)
-{
-  assert(_tensor_mgr->constTensors().size() == 0);
-  assert(_tensor_mgr->nonconstTensors().size() == 0);
-  // Normal tensors
-  for (auto &entry : _tensor_info_map)
-  {
-    auto ind = entry.first;
-    if (_parent_map.count(ind) > 0)
-      continue;
-
-    const auto &info = entry.second;
-    _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_);
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorManager.h b/runtime/onert/backend/gpu_cl/ClTensorManager.h

deleted file mode 100644 (file)

index 49a1173..0000000
--- a/runtime/onert/backend/gpu_cl/ClTensorManager.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-
-#include "ClMemoryManager.h"
-
-#include "open_cl/InferenceContext.h"
-#include "open_cl/TensorType.h"
-
-#include "ir/OperandInfo.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorManager
-{
-public:
-  using T_ClMemoryManager = ClMemoryManager<T_ITensor, T_Tensor>;
-
-  ClTensorManager(T_ClMemoryManager *const_mgr, T_ClMemoryManager *nonconst_mgr);
-
-  virtual ~ClTensorManager() = default;
-
-  void allocateConsts(void);
-  void allocateNonconsts(void);
-  void deallocateConsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<Environment> environment, DeviceInfo &device_info);
-
-  std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind);
-
-  void startLifetime(const ir::OperandIndex &ind);
-  void finishLifetime(const ir::OperandIndex &ind);
-
-  std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<InferenceContext::DummyTensor> atR(const ir::OperandIndex &ind);
-
-  InferenceContext::TensorReserver &constTensorReservers(void);
-  InferenceContext::TensorReserver &nonconstTensorReservers(void);
-
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void);
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-  void tryDeallocConstants(void);
-
-private:
-  std::unique_ptr<T_ClMemoryManager> _const_mgr;
-  std::unique_ptr<T_ClMemoryManager> _nonconst_mgr;
-  ir::OperandIndexMap<T_ClMemoryManager &> _ind_to_mgr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorManager<T_ITensor, T_Tensor>::ClTensorManager(T_ClMemoryManager *const_mgr,
-                                                      T_ClMemoryManager *nonconst_mgr)
-  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
-{
-  // DO NOTHING
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateConsts(void)
-{
-  _const_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateConsts(void)
-{
-  _const_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateNonconsts(void)
-{
-  _nonconst_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::buildTensor(
-  const ir::OperandIndex &ind, const ir::OperandInfo &info,
-  InferenceContext::CreateInferenceInfo create_info, std::shared_ptr<Environment> environment,
-  DeviceInfo &device_info)
-{
-  assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
-
-  if (info.isConstant())
-  {
-    _const_mgr->buildTensor(ind, info, create_info, environment, device_info);
-    _ind_to_mgr.insert({ind, *_const_mgr});
-  }
-  else
-  {
-    _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info);
-    _ind_to_mgr.insert({ind, *_nonconst_mgr});
-  }
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::startLifetime(const ir::OperandIndex &ind)
-{
-  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
-  _ind_to_mgr.at(ind).startLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::finishLifetime(const ir::OperandIndex &ind)
-{
-  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
-  _ind_to_mgr.at(ind).finishLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<T_ITensor> ClTensorManager<T_ITensor, T_Tensor>::at(const ir::OperandIndex &ind)
-{
-  if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
-    return nullptr;
-
-  auto &tensors = _ind_to_mgr.at(ind).tensors();
-  if (tensors.find(ind) != tensors.end())
-  {
-    return tensors.at(ind);
-  }
-
-  return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::constTensors(void)
-{
-  return _const_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensors(void)
-{
-  return _nonconst_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<InferenceContext::DummyTensor>
-ClTensorManager<T_ITensor, T_Tensor>::atR(const ir::OperandIndex &ind)
-{
-  if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _nonconst_mgr->tensorReservers().Get(ind.value());
-  }
-  else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _const_mgr->tensorReservers().Get(ind.value());
-  }
-  return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &ClTensorManager<T_ITensor, T_Tensor>::constTensorReservers(void)
-{
-  return _const_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensorReservers(void)
-{
-  return _nonconst_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::iterate(
-  const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (auto it : _nonconst_mgr->tensors())
-    fn(it.first);
-
-  for (auto it : _const_mgr->tensors())
-    fn(it.first);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::tryDeallocConstants(void)
-{
-  // NYI
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h b/runtime/onert/backend/gpu_cl/ClTensorRegistry.h

deleted file mode 100644 (file)

index 1f0018b..0000000
--- a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
-#define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
-
-#include "backend/ITensorRegistry.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-/**
- * @brief Tensor registry class for acl backends
- *
- * This is implemented as a wrapper of AclTensorManager.
- */
-template <typename T_ClTensorManager> class ClTensorRegistry : public ITensorRegistry
-{
-public:
-  ClTensorRegistry(T_ClTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
-
-  ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
-
-  ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getITensor(ind); }
-
-  auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
-
-  auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
-
-private:
-  T_ClTensorManager *_tensor_mgr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/gpu_cl/Config.cc b/runtime/onert/backend/gpu_cl/Config.cc

index 067a2070fe5d1e8704f55aa1e81ad9c3a03d3dd6..9959a471b9810967ac7bb230eecace61df55c509 100644 (file)
--- a/runtime/onert/backend/gpu_cl/Config.cc
+++ b/runtime/onert/backend/gpu_cl/Config.cc
@@ -17,8 +17,11 @@
  #include "Config.h"
  
  #include <dlfcn.h>
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/Status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+
+using namespace tflite::gpu::cl;
  
  namespace onert
  {
@@ -26,12 +29,9 @@ namespace backend
  {
  namespace gpu_cl
  {
-
-Config::~Config() { UnloadOpenCL(_handle); }
-
  bool Config::initialize()
  {
-  if (LoadOpenCL(&_handle).ok())
+  if (LoadOpenCL().ok())
    {
      return true;
    }
diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h

index aa5a51a1573298e71fee7bf70bbd18a4376d5fb4..6a455bbb5e98cb905fa77e46178540c63dd1314f 100644 (file)
--- a/runtime/onert/backend/gpu_cl/Config.h
+++ b/runtime/onert/backend/gpu_cl/Config.h
@@ -31,7 +31,7 @@ namespace gpu_cl
  class Config : public IConfig
  {
  public:
-  virtual ~Config();
+  virtual ~Config() {}
  
  public:
    std::string id() override { return "gpu_cl"; }
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc

index a84867f8cceb7392003f9c50994f896450e9fefb..04edc392863950dc2fb01d271d70c8a00a1630a3 100644 (file)
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
@@ -19,13 +19,14 @@
  
  #include "KernelGenerator.h"
  
-#include "ClTensorRegistry.h"
  #include "ClFunction.h"
  #include "TensorManager.h"
  
-#include "open_cl/selectors/ConvolutionSelector.h"
-#include "open_cl/selectors/DwConvolutionSelector.h"
-#include "open_cl/selectors/SimpleSelectors.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
  
  #include "ir/Operations.h"
  #include "ir/Operations.Include.h"
@@ -37,6 +38,9 @@
  #include "util/logging.h"
  #include "util/Utils.h"
  
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+
  namespace onert
  {
  namespace backend
@@ -60,14 +64,14 @@ void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *a
    }
  }
  
-gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
  {
    switch (type_ir)
    {
      case ir::operation::Pool2D::PoolType::AVG:
-      return gpu_cl::PoolingType::AVERAGE;
+      return PoolingType::AVERAGE;
      case ir::operation::Pool2D::PoolType::MAX:
-      return gpu_cl::PoolingType::MAX;
+      return PoolingType::MAX;
      default:
        throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
    }
@@ -75,7 +79,7 @@ gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
  
  KernelGenerator::KernelGenerator(const ir::Graph &graph,
                                   const std::shared_ptr<TensorBuilder> &tensor_builder,
-                                 const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
                                   const std::shared_ptr<CreationContext> &creation_context)
    : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
      _operations_ctx(graph.operations()), _current_layout{graph.layout()},
@@ -190,7 +194,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
    auto bias_tensor = _tensor_reg->getClTensor(bias);
    auto output_tensor = _tensor_reg->getClTensor(output);
  
-  gpu_cl::Convolution2DAttributes attr;
+  Convolution2DAttributes attr;
    attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
    attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
                        std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
@@ -237,7 +241,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
      {
        std::unique_ptr<GPUOperation> gpu_op_1;
        OperationDef op_def_1;
-      std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
  
        _new_tensors[output] = new_tensor;
        if (!CreateTensor(*_creation_context->context, output_shape,
@@ -334,9 +338,9 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
      const int filter_width = ker_shape.w;
      const int output_depth = out_shape.c;
  
-    InternalTensor<OHWI, DataType::FLOAT32> weights;
+    tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
      weights.id = attr.weights.id;
-    weights.shape = OHWI(output_depth, filter_height, filter_width, input_depth);
+    weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
      weights.data.resize(weights.shape.DimensionsProduct());
      float *dst = &weights.data[0];
      for (int j = 0; j < output_depth; ++j)
@@ -387,7 +391,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
      {
        std::unique_ptr<GPUOperation> gpu_op_1;
        OperationDef op_def_1;
-      std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
  
        _new_tensors[ofm_index] = new_tensor;
        if (!CreateTensor(*_creation_context->context, out_shape,
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h

index 3e341b111588a2d60b082fed4e38a27f2c4a0231..91fd3cd9d273771232693cb8ba9c52b124f0ebfc 100644 (file)
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.h
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h
@@ -17,11 +17,13 @@
  #ifndef __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
  #define __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
  
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
  #include "backend/basic/TensorRegistry.h"
  #include "TensorBuilder.h"
  #include "TensorManager.h"
  
+#include "tensorflow/lite/delegates/gpu/api.h"
+
  #include <backend/CustomKernelBuilder.h>
  #include <backend/basic/KernelGeneratorBase.h>
  #include <ir/Operands.h>
@@ -39,8 +41,8 @@ class KernelGenerator : public basic::KernelGeneratorBase
  {
  public:
    KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
-                  const std::shared_ptr<CreationContext> &creation_context);
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context);
  
    std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
  
@@ -58,9 +60,9 @@ private:
    const ir::Operations &_operations_ctx;
    ir::Layout _current_layout;
    std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<ClTensorRegistry<TensorManager>> _tensor_reg;
-  std::shared_ptr<CreationContext> _creation_context;
-  ir::OperandIndexMap<std::shared_ptr<Tensor>> _new_tensors;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+  ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
  };
  
  } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h

new file mode 100644 (file)

index 0000000..a3b9b39
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/MemoryManager.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+
+#include "ex/InferenceContextEx.h"
+#include "operand/CLTensor.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "util/logging.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class MemoryManager
+{
+public:
+  MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+
+  ~MemoryManager() = default;
+
+  void allocate(void)
+  {
+    for (const auto &tensor_entry : _tensors)
+    {
+      auto tensor = tensor_entry.second;
+      auto type = tensor->get_type();
+
+      // if (type == TensorType::TENSOR_TYPE_DELETE) {
+      //   continue;
+      // }
+
+      const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
+      const auto &shape = t->shape;
+      const auto &descriptor = t->descriptor;
+      if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
+      {
+        std::runtime_error("Failed to CreateTensor");
+      }
+      switch (type)
+      {
+        case TensorType::TENSOR_TYPE_INPUT:
+          tensor->writeConvertInit();
+          break;
+        case TensorType::TENSOR_TYPE_OUTPUT:
+          tensor->readConvertInit();
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  void deallocate(void)
+  {
+    // NYI
+  }
+
+  void startLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+  void finishLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+  {
+    tflite::gpu::ValueId max_id = 0;
+    auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
+    const auto shape = info.shape();
+
+    auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
+    _tensors[ind] = tensor;
+    tflite::gpu::BHWC t_shape;
+    switch (shape.rank())
+    {
+      case 1:
+        // B layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+        break;
+      case 2:
+        // BC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+        break;
+      case 3:
+        // BWC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+        break;
+      case 4:
+        // BHWC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+        break;
+      default:
+        break;
+    }
+
+    tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
+    tflite::gpu::Layout layout =
+      t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+    tflite::gpu::ValueId id = ind.value();
+    storage_type =
+      tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
+    auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
+    dummy->shape = t_shape;
+    dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
+    tensor_reserver_.Add(id, dummy);
+
+    max_id = std::max(max_id, id);
+
+    tensor_reserver_.SetNext(max_id + 1);
+  }
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
+
+  InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
+
+private:
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
+  InferenceContextEx::TensorReserverEx tensor_reserver_;
+  tflite::gpu::cl::CLContext *_context;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ParentInfo.h b/runtime/onert/backend/gpu_cl/ParentInfo.h

deleted file mode 100644 (file)

index d7cb2d4..0000000
--- a/runtime/onert/backend/gpu_cl/ParentInfo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_PARENT_INFO_H__
-#define __ONERT_BACKEND_PARENT_INFO_H__
-
-#include <ir/Index.h>
-#include <ir/Coordinates.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-/**
- * @brief      Struct to represent parent operand in child operand
- */
-struct ParentInfo
-{
-  ir::OperandIndex parent;
-  ir::Layout frontend_layout;
-  ir::Coordinates coordinates;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc

new file mode 100644 (file)

index 0000000..e717334
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <queue>
+
+#include "TensorBuilder.h"
+
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include <ir/Operands.h>
+#include <util/Utils.h>
+
+#include <cassert>
+#include <stack>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+using UsesType = cl_common::UsesType;
+
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+                             tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                             const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+  : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
+                                                                               environment}
+{
+  assert(_tensor_mgr);
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout backend_layout, TensorType type)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+  _uses_count_map[ind] = _operands.at(ind).getUses().size();
+
+  _tensor_info_map.emplace(ind, info);
+  _tensor_type_map.emplace(ind, type);
+
+  _tensor_layout_map.insert({ind, backend_layout});
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::FIRST, ind);
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::LAST, ind);
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { buildTensors(); }
+
+void TensorBuilder::allocate(void)
+{
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
+
+  for (auto &entry : lifetime_map)
+  {
+    auto &use = entry.second;
+    auto use_type = use.first;
+    auto use_index = use.second;
+    assert(use_index.valid());
+    if (use_type == UsesType::FIRST)
+      _tensor_mgr->startLifetime(use_index);
+    else
+      _tensor_mgr->finishLifetime(use_index);
+  }
+
+  _tensor_mgr->allocateConsts();
+
+  // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
+  //      After refactoring BackendContext we can uncomment this
+  // assert(_tensor_info_map.size() ==
+  //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
+  //       _parent_map.size());
+  _tensor_mgr->allocateNonconsts();
+}
+
+void TensorBuilder::postFunctionPrepare(void) { _tensor_mgr->tryDeallocConstants(); }
+
+void TensorBuilder::buildTensors(void)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+  // Normal tensors
+  for (auto &entry : _tensor_info_map)
+  {
+    auto ind = entry.first;
+    if (_parent_map.count(ind) > 0)
+      continue;
+    auto type = _tensor_type_map.at(ind);
+    const auto &info = entry.second;
+    _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
+                             type);
+  }
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h

index d55358191aa4b9585e5b25b503e04466235c9f40..2a5cb8b5e6c42c18bb343f586df1400de2ee9872 100644 (file)
--- a/runtime/onert/backend/gpu_cl/TensorBuilder.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h
@@ -17,10 +17,13 @@
  #ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
  #define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
  
-#include <backend/basic/TensorBuilder.h>
-#include "operand/ICLTensor.h"
-#include "operand/CLTensor.h"
-#include "ClTensorBuilder.h"
+#include "TensorManager.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/Operands.h>
+#include <ir/OperandIndexSequence.h>
  
  namespace onert
  {
@@ -28,8 +31,76 @@ namespace backend
  {
  namespace gpu_cl
  {
+class TensorBuilder
+{
+public:
+  TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+
+  /**
+   * @brief     Register tensor information to allocate on ACL-CL backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Tensor information
+   * @param[in] layout Tensor data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout, TensorType type);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare();
+  void allocate();
+  void postFunctionPrepare();
+
+  TensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
+
+  void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
+  {
+    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+                                                                : true);
+    _uses_count_map[index] = num_uses;
+  }
+
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
+  {
+    _parent_map = std::move(parent_map);
+  }
+
+  bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
+
+  /**
+   * @brief     Check child tensor is allocated as subtensor of parent tensor
+   * @param[in] parent  Index of parent
+   * @param[in] child   Index of child
+   * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
+   */
+  bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
+
+private:
+  void buildTensors(void);
+  ir::OperandIndex findRootParent(ir::OperandIndex index);
+
+private:
+  const ir::Operands &_operands;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
+  ir::OperandIndexMap<TensorType> _tensor_type_map;
+  ir::OperandIndexMap<size_t> _uses_count_map;
+
+  std::unique_ptr<TensorManager> _tensor_mgr;
+  tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+
+  // for linear executor
+  cl_common::LifetimeSeq _lifetime_seq;
  
-using TensorBuilder = ClTensorBuilder<operand::ICLTensor, operand::CLTensor>;
+  // Extra info for concat elimination
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
+};
  
  } // namespace gpu_cl
  } // namespace backend
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h

new file mode 100644 (file)

index 0000000..7290ff5
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+enum TensorType
+{
+  TENSOR_TYPE_VALID = 0,
+  TENSOR_TYPE_INPUT = 1,
+  TENSOR_TYPE_OUTPUT = 2,
+  TENSOR_TYPE_DELETE = 3
+};
+
+absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc

new file mode 100644 (file)

index 0000000..9fe0605
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorManager.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+TensorManager::TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr)
+  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
+{
+  // DO NOTHING
+}
+
+void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
+
+void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
+
+void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
+
+void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                                std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                                tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+{
+  assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
+
+  if (info.isConstant())
+  {
+    _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _ind_to_mgr.insert({ind, *_const_mgr});
+  }
+  else
+  {
+    _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _ind_to_mgr.insert({ind, *_nonconst_mgr});
+  }
+}
+
+void TensorManager::startLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).startLifetime(ind);
+}
+
+void TensorManager::finishLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).finishLifetime(ind);
+}
+
+std::shared_ptr<operand::ICLTensor> TensorManager::at(const ir::OperandIndex &ind)
+{
+  if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
+    return nullptr;
+
+  auto &tensors = _ind_to_mgr.at(ind).tensors();
+  if (tensors.find(ind) != tensors.end())
+  {
+    return tensors.at(ind);
+  }
+
+  return nullptr;
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::constTensors(void)
+{
+  return _const_mgr->tensors();
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconstTensors(void)
+{
+  return _nonconst_mgr->tensors();
+}
+
+std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
+{
+  if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
+  {
+    return _nonconst_mgr->tensorReservers().Get(ind.value());
+  }
+  else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
+  {
+    return _const_mgr->tensorReservers().Get(ind.value());
+  }
+  return nullptr;
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
+{
+  return _const_mgr->tensorReservers();
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
+{
+  return _nonconst_mgr->tensorReservers();
+}
+
+void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (auto it : _nonconst_mgr->tensors())
+    fn(it.first);
+
+  for (auto it : _const_mgr->tensors())
+    fn(it.first);
+}
+
+void TensorManager::tryDeallocConstants(void)
+{
+  // NYI
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h

index 111b5f8a768a7544c64a177cf7dc00eee9528ec4..52abc579aef247cc0d55baa3bb5e086205b7d4df 100644 (file)
--- a/runtime/onert/backend/gpu_cl/TensorManager.h
+++ b/runtime/onert/backend/gpu_cl/TensorManager.h
@@ -14,15 +14,16 @@
   * limitations under the License.
   */
  
-#ifndef __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
  
-#include "ClMemoryManager.h"
-#include "ClTensorManager.h"
-#include "open_cl/ClContext.h"
-#include "operand/CLTensor.h"
-#include "operand/ICLTensor.h"
-#include "util/logging.h"
+#include "MemoryManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include "ir/OperandInfo.h"
+#include "ir/OperandIndexMap.h"
  
  namespace onert
  {
@@ -31,13 +32,50 @@ namespace backend
  namespace gpu_cl
  {
  
-using MemoryManager = ClMemoryManager<operand::ICLTensor, operand::CLTensor>;
+class TensorManager
+{
+public:
+  TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr);
+
+  virtual ~TensorManager() = default;
+
+  void allocateConsts(void);
+  void allocateNonconsts(void);
+  void deallocateConsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+
+  std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
+
+  void startLifetime(const ir::OperandIndex &ind);
+  void finishLifetime(const ir::OperandIndex &ind);
+
+  std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
+  std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
+
+  InferenceContextEx::TensorReserverEx &constTensorReservers(void);
+  InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+  void tryDeallocConstants(void);
  
-using TensorManager = ClTensorManager<operand::ICLTensor, operand::CLTensor>;
+private:
+  std::unique_ptr<MemoryManager> _const_mgr;
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
+};
  
-inline TensorManager *createTensorManager(CLContext *context)
+inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
  {
-  VERBOSE(createTensorManager) << "ClTensorManager" << std::endl;
+  VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
    return new TensorManager(new MemoryManager(context), new MemoryManager(context));
  }
  
@@ -45,4 +83,4 @@ inline TensorManager *createTensorManager(CLContext *context)
  } // namespace backend
  } // namespace onert
  
-#endif // __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h

new file mode 100644 (file)

index 0000000..6f17aff
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
+
+#include "TensorManager.h"
+
+#include "backend/ITensorRegistry.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+/**
+ * @brief Tensor registry class for gpu-cl backends
+ *
+ * This is implemented as a wrapper of TensorManager.
+ */
+class TensorRegistry : public ITensorRegistry
+{
+public:
+  TensorRegistry(TensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+
+  ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
+
+  ITensor *getNativeITensor(const ir::OperandIndex &ind) override { return getITensor(ind); }
+
+  auto getClTensor(const ir::OperandIndex &ind) { return _tensor_mgr->at(ind).get(); }
+
+  auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
+
+private:
+  TensorManager *_tensor_mgr;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h

new file mode 100644 (file)

index 0000000..f673879
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "absl/strings/str_cat.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class InferenceContextEx : public tflite::gpu::cl::InferenceContext
+{
+public:
+  struct DummyTensor
+  {
+    tflite::gpu::BHWC shape;
+    tflite::gpu::cl::TensorDescriptor descriptor;
+
+    bool operator==(const DummyTensor &b) const
+    {
+      return shape == b.shape && descriptor == b.descriptor;
+    }
+  };
+
+  class TensorReserverEx
+  {
+  public:
+    tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
+    {
+      reservations_[next_] = dummy;
+      return next_++;
+    }
+    void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
+    {
+      reservations_[id] = dummy;
+    }
+    void SetNext(tflite::gpu::ValueId id) { next_ = id; }
+    bool HaveTensor(tflite::gpu::ValueId id)
+    {
+      return reservations_.find(id) != reservations_.end();
+    }
+    std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
+
+    std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+    GetTensorDescs() const
+    {
+      std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
+      for (auto &v : reservations_)
+      {
+        tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
+        desc.shape.b = v.second->shape.b;
+        desc.shape.h = v.second->shape.h;
+        desc.shape.w = v.second->shape.w;
+        desc.shape.d = 1;
+        desc.shape.c = v.second->shape.c;
+        result.push_back({v.first, desc});
+      }
+      return result;
+    }
+
+    void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+               &tensors)
+    {
+      for (auto &v : tensors)
+      {
+        auto dummy = std::make_shared<DummyTensor>();
+        dummy->descriptor = v.second;
+        dummy->shape.b = v.second.shape.b;
+        dummy->shape.h = v.second.shape.h;
+        dummy->shape.w = v.second.shape.w;
+        dummy->shape.c = v.second.shape.c;
+        Add(v.first, dummy);
+      }
+    }
+
+  private:
+    // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
+    std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
+    tflite::gpu::ValueId next_ = 0;
+  };
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h b/runtime/onert/backend/gpu_cl/open_cl/AccessType.h

deleted file mode 100644 (file)

index 81efd66..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-enum class AccessType
-{
-  UNKNOWN,
-  READ,
-  WRITE,
-  READ_WRITE,
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.cc b/runtime/onert/backend/gpu_cl/open_cl/Api.cc

deleted file mode 100644 (file)

index 10bf87c..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.cc
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Api.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct ObjectTypeGetter
-{
-  ObjectType operator()(absl::monostate) const { return ObjectType::UNKNOWN; }
-  ObjectType operator()(OpenClBuffer) const { return ObjectType::OPENCL_BUFFER; }
-  ObjectType operator()(OpenClTexture) const { return ObjectType::OPENCL_TEXTURE; }
-  ObjectType operator()(CpuMemory) const { return ObjectType::CPU_MEMORY; }
-};
-
-struct ObjectValidityChecker
-{
-  bool operator()(absl::monostate) const { return false; }
-  bool operator()(OpenClBuffer obj) const { return obj.memobj; }
-  bool operator()(OpenClTexture obj) const { return obj.memobj; }
-  bool operator()(CpuMemory obj) const
-  {
-    return obj.data != nullptr && obj.size_bytes > 0 &&
-           (data_type == DataType::UNKNOWN || obj.size_bytes % SizeOf(data_type) == 0);
-  }
-  DataType data_type;
-};
-
-} // namespace
-
-bool IsValid(const ObjectDef &def)
-{
-  return def.data_type != DataType::UNKNOWN && def.data_layout != DataLayout::UNKNOWN &&
-         def.object_type != ObjectType::UNKNOWN;
-}
-
-ObjectType GetType(const TensorObject &object) { return absl::visit(ObjectTypeGetter{}, object); }
-
-bool IsValid(const TensorObjectDef &def) { return IsValid(def.object_def); }
-
-bool IsValid(const TensorObjectDef &def, const TensorObject &object)
-{
-  return GetType(object) == def.object_def.object_type &&
-         absl::visit(ObjectValidityChecker{def.object_def.data_type}, object);
-}
-
-bool IsObjectPresent(ObjectType type, const TensorObject &obj)
-{
-  switch (type)
-  {
-    case ObjectType::CPU_MEMORY:
-      return absl::holds_alternative<CpuMemory>(obj);
-    case ObjectType::OPENCL_BUFFER:
-      return absl::holds_alternative<OpenClBuffer>(obj);
-    case ObjectType::OPENCL_TEXTURE:
-      return absl::holds_alternative<OpenClTexture>(obj);
-    case ObjectType::UNKNOWN:
-      return false;
-  }
-  return false;
-}
-
-uint32_t NumElements(const TensorObjectDef &def)
-{
-  const auto &d = def.dimensions;
-  switch (def.object_def.data_layout)
-  {
-    case DataLayout::BHWC:
-      return d.product();
-    case DataLayout::HWDC4:
-    case DataLayout::HDWC4:
-    case DataLayout::DHWC4:
-      return d.b * d.h * d.w * AlignByN(d.c, 4);
-    case DataLayout::UNKNOWN:
-      return 0;
-  }
-  return 0;
-}
-
-int GetPosition(const InferenceOptions &options, InferencePriority p)
-{
-  if (options.priority1 == p)
-    return 1;
-  if (options.priority2 == p)
-    return 2;
-  if (options.priority3 == p)
-    return 3;
-  return 4; // least important
-}
-
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
-                                         InferencePriority p2)
-{
-  int p1_position = GetPosition(options, p1);
-  int p2_position = GetPosition(options, p2);
-  if (p1_position == p2_position)
-    return PriorityImportance::UNKNOWN;
-  return p1_position < p2_position ? PriorityImportance::HIGHER : PriorityImportance::LOWER;
-}
-
-bool IsValid(const InferenceOptions &options)
-{
-  if (options.usage == InferenceUsage::UNKNOWN)
-  {
-    return false;
-  }
-  if (options.priority1 == InferencePriority::UNKNOWN ||
-      options.priority2 == InferencePriority::UNKNOWN ||
-      options.priority3 == InferencePriority::UNKNOWN)
-  {
-    return false;
-  }
-  if (options.priority1 == InferencePriority::AUTO)
-  {
-    return false;
-  }
-  if (options.priority2 == InferencePriority::AUTO && options.priority3 != InferencePriority::AUTO)
-  {
-    return false;
-  }
-  if (options.priority1 == options.priority2 || options.priority1 == options.priority3)
-  {
-    return false;
-  }
-  if (options.priority2 == options.priority3 && options.priority2 != InferencePriority::AUTO)
-  {
-    return false;
-  }
-  return true;
-}
-
-// Implementation note: this resolution logic is shared between GL and CL
-// backends, but they might have own logic. Thus, the function is defined
-// here just for code re-use purposes.
-void ResolveAutoPriority(InferenceOptions *options)
-{
-  // priority1 can not be AUTO as it would make options invalid.
-  if (options->priority2 == InferencePriority::AUTO)
-  {
-    switch (options->priority1)
-    {
-      case InferencePriority::MIN_LATENCY:
-        options->priority2 = InferencePriority::MIN_MEMORY_USAGE;
-        options->priority3 = InferencePriority::MAX_PRECISION;
-        return;
-      case InferencePriority::MIN_MEMORY_USAGE:
-        options->priority2 = InferencePriority::MAX_PRECISION;
-        options->priority3 = InferencePriority::MIN_LATENCY;
-        return;
-      case InferencePriority::MAX_PRECISION:
-        options->priority2 = InferencePriority::MIN_LATENCY;
-        options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
-        return;
-      case InferencePriority::UNKNOWN:
-      case InferencePriority::AUTO:
-        // Invalid and unreachable option.
-        return;
-    }
-  }
-
-  if (options->priority3 == InferencePriority::AUTO)
-  {
-    // Simply add missing priority
-    if (GetPosition(*options, InferencePriority::MIN_LATENCY) == 4)
-    {
-      options->priority3 = InferencePriority::MIN_LATENCY;
-    }
-    else if (GetPosition(*options, InferencePriority::MAX_PRECISION) == 4)
-    {
-      options->priority3 = InferencePriority::MAX_PRECISION;
-    }
-    else if (GetPosition(*options, InferencePriority::MIN_MEMORY_USAGE) == 4)
-    {
-      options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
-    }
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.h b/runtime/onert/backend/gpu_cl/open_cl/Api.h

deleted file mode 100644 (file)

index 35be3d9..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-
-// Usage example:
-//
-//   // Builder is created from a model using GPU-specific parameters.
-//   std::unique_ptr<InferenceBuilder> builder = ...;
-//
-//   // input data is coming from a texture
-//   // output data goes to CPU
-//   builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
-//                                  ObjectType::OPENGL_TEXTURE, true});
-//   builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
-//                                  ObjectType::CPU_MEMORY, false});
-//   std::unique_ptr<InferenceRunner> runner;
-//   RETURN_IF_ERROR(builder->Build(&runner));  // may take significant time.
-//   RETURN_IF_ERROR(
-//       runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
-//   RETURN_IF_ERROR(runner->Run());
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "absl/types/span.h"
-#include "absl/types/variant.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Common abbreviations:
-//   B  - batch
-//   H  - height
-//   W  - width
-//   C  - channels
-//   D  - depth := DivideRoundUp(C, 4)
-//   C4 - is the constant = 4.
-enum class DataLayout
-{
-  UNKNOWN,
-  BHWC,
-  DHWC4,
-  HWDC4,
-  HDWC4,
-};
-
-enum class ObjectType
-{
-  UNKNOWN,
-  CPU_MEMORY,
-  OPENCL_TEXTURE,
-  OPENCL_BUFFER,
-};
-
-struct OpenClBuffer
-{
-  OpenClBuffer() = default;
-  explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {}
-
-  cl_mem memobj = nullptr;
-};
-
-struct OpenClTexture
-{
-  OpenClTexture() = default;
-  explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {}
-
-  cl_mem memobj = nullptr;
-  // TODO(akulik): should it specify texture format?
-};
-
-struct CpuMemory
-{
-  CpuMemory() = default;
-  CpuMemory(void *new_data, size_t new_size_bytes) : data(new_data), size_bytes(new_size_bytes) {}
-
-  void *data = nullptr;
-  size_t size_bytes = 0;
-};
-
-template <typename T> inline CpuMemory MakeCpuMemory(absl::Span<T> t)
-{
-  CpuMemory m;
-  m.data = t.data();
-  m.size_bytes = t.size() * sizeof(T);
-  return m;
-}
-
-template <typename T> inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t)
-{
-  CpuMemory m;
-  m.data = const_cast<T *>(t.data());
-  m.size_bytes = t.size() * sizeof(T);
-  return m;
-}
-
-// Defines object representation.
-struct ObjectDef
-{
-  DataType data_type = DataType::UNKNOWN;
-  DataLayout data_layout = DataLayout::UNKNOWN;
-  ObjectType object_type = ObjectType::UNKNOWN;
-
-  // If true, then object is managed externally and needs to be provided to
-  // InferenceRunner by a user before running inference.
-  //
-  // User-provided objects will not be re-used internally for any purpose to
-  // lower overall memory usage.
-  bool user_provided = false;
-
-  bool operator==(const ObjectDef &other) const
-  {
-    return data_type == other.data_type && data_layout == other.data_layout &&
-           object_type == other.object_type && user_provided == other.user_provided;
-  }
-};
-
-bool IsValid(const ObjectDef &def);
-
-struct Dimensions
-{
-  Dimensions() : b(1), h(1), w(1), c(1) {}
-
-  Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
-    : b(batch), h(height), w(width), c(channels)
-  {
-  }
-
-  int32_t d() const { return DivideRoundUp(c, 4); }
-
-  int32_t product() const { return b * h * w * c; }
-
-  bool operator==(const Dimensions &other) const
-  {
-    return b == other.b && h == other.h && w == other.w && c == other.c;
-  }
-
-  int32_t b;
-  int32_t h;
-  int32_t w;
-  int32_t c;
-};
-
-// Connects tensor shape with corresponding object definition.
-struct TensorObjectDef
-{
-  // Dimensions semantic is defined by corresponding DataLayout.
-  Dimensions dimensions;
-  ObjectDef object_def;
-
-  bool operator==(const TensorObjectDef &other) const
-  {
-    return dimensions == other.dimensions && object_def == other.object_def;
-  }
-};
-
-// @return true if tensor object def is defined.
-bool IsValid(const TensorObjectDef &def);
-
-// @return the number of elements in a tensor object.
-uint32_t NumElements(const TensorObjectDef &def);
-
-using TensorObject = absl::variant<absl::monostate, CpuMemory, OpenClBuffer, OpenClTexture>;
-
-// @return true if object is set and corresponding values are defined.
-bool IsValid(const TensorObjectDef &def, const TensorObject &object);
-
-ObjectType GetType(const TensorObject &object);
-
-// @return true if corresponding object is set for the given type
-bool IsObjectPresent(ObjectType type, const TensorObject &obj);
-
-class InferenceRunner;
-
-// Allows to inspect and change input and output definitions before a graph is
-// prepared for the inference.
-class InferenceBuilder
-{
-public:
-  virtual ~InferenceBuilder() {}
-
-  // Returns inference graph inputs and outputs definitions.
-  virtual std::vector<TensorObjectDef> inputs() const = 0;
-  virtual std::vector<TensorObjectDef> outputs() const = 0;
-
-  // Sets new shape for the input if underlying implementation and graph
-  // structure allows dynamic tensors.
-  virtual absl::Status SetInputShape(int index, const Dimensions &dimensions) = 0;
-
-  // Updates object definitions for the given index. Implementation may allow
-  // to use different layouts and/or data type conversions between objects
-  // defined in a graph and given objects, for example:
-  //   input '0' is DataType::FLOAT32, DataLayout::BHWC.
-  //   A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
-  //   An implementation may allow this transformation to happen automatically
-  //   under the hood.
-  virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
-  virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
-  virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def)
-  {
-    auto input_defs = inputs();
-    for (size_t i = 0; i < input_defs.size(); ++i)
-    {
-      RETURN_IF_ERROR(SetInputObjectDef(i, def));
-    }
-    return absl::OkStatus();
-  }
-  virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def)
-  {
-    auto output_defs = outputs();
-    for (size_t i = 0; i < output_defs.size(); ++i)
-    {
-      RETURN_IF_ERROR(SetOutputObjectDef(i, def));
-    }
-    return absl::OkStatus();
-  }
-
-  // Creates new instance of the inference runner. InferenceBuilder stays valid
-  // and could be used to create another inference runner if needed.
-  //
-  // This method may take significant time to prepare new inference runner. For
-  // example, it may require to compile OpenGL shaders.
-  virtual absl::Status Build(std::unique_ptr<InferenceRunner> *runner) = 0;
-};
-
-// Runs prepared inference. Every object marked as external needs to be set
-// prior calling Run method.
-class InferenceRunner
-{
-public:
-  virtual ~InferenceRunner() {}
-
-  // Returns inference graph inputs and outputs definitions.
-  virtual std::vector<TensorObjectDef> inputs() const = 0;
-  virtual std::vector<TensorObjectDef> outputs() const = 0;
-
-  // Getters provide access to underlying objects for the given index.
-  // Setters allow to set or change external object for the given index. Note,
-  // object need to match object definition set before in InferenceBuilder.
-
-  virtual absl::Status GetInputObject(int index, TensorObject *object) = 0;
-  virtual absl::Status GetOutputObject(int index, TensorObject *object) = 0;
-  virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
-  virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
-
-  virtual absl::Status Run() = 0;
-};
-
-// Encapsulated compilation/runtime tradeoffs.
-enum class InferenceUsage
-{
-  UNKNOWN,
-
-  // InferenceRunner will be used only once. Therefore, it is important to
-  // minimize bootstrap time as well.
-  FAST_SINGLE_ANSWER,
-
-  // Prefer maximizing the throughput. Same inference runner will be used
-  // repeatedly on different inputs.
-  SUSTAINED_SPEED,
-};
-
-// Defines aspects to control while instantiating a runner.
-enum class InferencePriority
-{
-  UNKNOWN,
-
-  AUTO,
-
-  MIN_LATENCY,
-
-  MAX_PRECISION,
-
-  MIN_MEMORY_USAGE,
-};
-
-struct InferenceOptions
-{
-  InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED;
-
-  // Ordered priorities provide better understanding of desired semantics,
-  // where priority(n) is more important than priority(n+1).
-  // AUTO priority is needed when a single priority is the most important
-  // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving
-  // everything else to AUTO would result in configuration that achieves maximum
-  // performance.
-  //
-  // AUTO priority can only be used when higher priorities are fully specified.
-  // For example:
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
-  //            priority3 = AUTO
-  //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
-  //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
-  //            priority3 = MAX_PRECISION
-  // Invalid priorities will result in error.
-  InferencePriority priority1 = InferencePriority::MAX_PRECISION;
-
-  InferencePriority priority2 = InferencePriority::AUTO;
-
-  InferencePriority priority3 = InferencePriority::AUTO;
-};
-
-// Returns a position number for the priority. If priority is missing,
-// then it it would return 'max num priorities + 1'.
-int GetPosition(const InferenceOptions &options, InferencePriority p);
-
-// Return true if options are valid.
-bool IsValid(const InferenceOptions &options);
-
-// Resolves AUTO priorities and specifies them explicitly.
-// Note, no-one should assume that these mappings will not change.
-// Technically this function is declared here for code re-use purposes and
-// by no means it should be treated as canonical way to resolve AUTO.
-void ResolveAutoPriority(InferenceOptions *options);
-
-enum class PriorityImportance
-{
-  UNKNOWN,
-  HIGHER,
-  LOWER,
-};
-
-// If both p1 and p2 are not present in options, return UNKNOWN
-// If p1 is present, but p2 is not, return HIGHER
-// If p2 is present, but p1 is not, return LOWER
-// If both are present, and p1 is more important, return HIGHER, otherwise,
-// LOWER.
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
-                                         InferencePriority p2);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc b/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc

deleted file mode 100644 (file)

index a7f86bf..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Arguments.h"
-
-#include "absl/strings/ascii.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_replace.h"
-#include "absl/strings/str_split.h"
-#include "absl/strings/substitute.h"
-
-#include "AccessType.h"
-#include "TensorType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-bool IsWordSymbol(char symbol) { return absl::ascii_isalnum(symbol) || symbol == '_'; }
-
-std::string GetNextWord(const std::string &code, size_t first_position)
-{
-  size_t pos = first_position;
-  char t = code[pos];
-  while (IsWordSymbol(t))
-  {
-    pos++;
-    t = code[pos];
-  }
-  return code.substr(first_position, pos - first_position);
-}
-
-size_t FindEnclosingBracket(const std::string &text, size_t first_pos, char bracket)
-{
-  const std::map<char, char> brackets = {
-    {'(', ')'},
-    {'{', '}'},
-    {'[', ']'},
-    {'<', '>'},
-  };
-  char b_open = bracket;
-  auto it = brackets.find(b_open);
-  if (it == brackets.end())
-  {
-    return -1;
-  }
-  char b_close = it->second;
-  size_t pos = first_pos;
-  int opened = 1;
-  int closed = 0;
-  while (opened != closed && pos < text.size())
-  {
-    if (text[pos] == b_open)
-    {
-      opened++;
-    }
-    else if (text[pos] == b_close)
-    {
-      closed++;
-    }
-    pos++;
-  }
-  if (opened == closed)
-  {
-    return pos;
-  }
-  else
-  {
-    return -1;
-  }
-}
-
-absl::Status ParseArgsInsideBrackets(const std::string &text, size_t open_bracket_pos,
-                                     size_t *close_bracket_pos, std::vector<std::string> *args)
-{
-  *close_bracket_pos = FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
-  if (*close_bracket_pos == static_cast<size_t>(-1))
-  {
-    return absl::NotFoundError("Not found enclosing bracket");
-  }
-  std::string str_args =
-    text.substr(open_bracket_pos + 1, *close_bracket_pos - open_bracket_pos - 2);
-  std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
-  args->reserve(words.size());
-  for (const auto &word : words)
-  {
-    absl::string_view arg = absl::StripAsciiWhitespace(word);
-    if (!arg.empty())
-    {
-      args->push_back(std::string(arg));
-    }
-  }
-  return absl::OkStatus();
-}
-
-void ReplaceAllWords(const std::string &old_word, const std::string &new_word, std::string *str)
-{
-  size_t position = str->find(old_word);
-  while (position != std::string::npos)
-  {
-    char prev = position == 0 ? '.' : (*str)[position - 1];
-    char next = position + old_word.size() < str->size() ? (*str)[position + old_word.size()] : '.';
-    if (IsWordSymbol(prev) || IsWordSymbol(next))
-    {
-      position = str->find(old_word, position + 1);
-      continue;
-    }
-    str->replace(position, old_word.size(), new_word);
-    position = str->find(old_word, position + new_word.size());
-  }
-}
-
-std::string RenameArg(const std::vector<std::string> &object_names, const std::string &postfix,
-                      const std::string &arg_name)
-{
-  for (const auto &object_name : object_names)
-  {
-    if (absl::StartsWith(arg_name, object_name) && arg_name.size() > object_name.size() &&
-        arg_name[object_name.size()] == '_')
-    {
-      return object_name + postfix +
-             arg_name.substr(object_name.size(), arg_name.size() - object_name.size());
-    }
-  }
-  return arg_name + postfix;
-}
-
-void AppendArgument(const std::string &arg, std::string *args)
-{
-  if (!args->empty())
-  {
-    absl::StrAppend(args, ",\n  ");
-  }
-  absl::StrAppend(args, arg);
-}
-
-std::string GetImageModifier(AccessType access)
-{
-  switch (access)
-  {
-    case AccessType::READ:
-      return "__read_only";
-    case AccessType::WRITE:
-      return "__write_only";
-    case AccessType::READ_WRITE:
-      return "__read_write";
-    default:
-      throw std::runtime_error("Invalid AccessType");
-  }
-}
-
-std::string GetDefaultSamplers(const DeviceInfo &device_info)
-{
-  std::string result;
-  result += "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
-            "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
-  if (device_info.IsAdreno3xx())
-  {
-    // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
-    // we can observe huge register overhead when compared to other modes.
-
-    // While using CLK_ADDRESS_NONE with out-of-range image coordinates is
-    // undefined in the OpenCL specification, we have observed that
-    // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
-    // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
-    // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
-    // 3xx.
-    result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
-              "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
-  }
-  else
-  {
-    result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
-              "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
-  }
-
-  return result;
-}
-
-} // namespace
-
-// Static
-constexpr char Arguments::kArgsPrefix[];
-
-Arguments::Arguments(Arguments &&args)
-  : int_values_(std::move(args.int_values_)),
-    shared_int4s_data_(std::move(args.shared_int4s_data_)),
-    float_values_(std::move(args.float_values_)),
-    shared_float4s_data_(std::move(args.shared_float4s_data_)), buffers_(std::move(args.buffers_)),
-    images2d_(std::move(args.images2d_)), image2d_arrays_(std::move(args.image2d_arrays_)),
-    images3d_(std::move(args.images3d_)), image_buffers_(std::move(args.image_buffers_)),
-    custom_memories_(std::move(args.custom_memories_)), object_refs_(std::move(args.object_refs_)),
-    objects_(std::move(args.objects_))
-{
-}
-Arguments &Arguments::operator=(Arguments &&args)
-{
-  if (this != &args)
-  {
-    int_values_ = std::move(args.int_values_);
-    shared_int4s_data_ = std::move(args.shared_int4s_data_);
-    float_values_ = std::move(args.float_values_);
-    shared_float4s_data_ = std::move(args.shared_float4s_data_);
-    buffers_ = std::move(args.buffers_);
-    images2d_ = std::move(args.images2d_);
-    image2d_arrays_ = std::move(args.image2d_arrays_);
-    images3d_ = std::move(args.images3d_);
-    image_buffers_ = std::move(args.image_buffers_);
-    custom_memories_ = std::move(args.custom_memories_);
-    object_refs_ = std::move(args.object_refs_);
-    objects_ = std::move(args.objects_);
-  }
-  return *this;
-}
-
-void Arguments::AddFloat(const std::string &name, float value)
-{
-  float_values_[name].value = value;
-}
-void Arguments::AddInt(const std::string &name, int value) { int_values_[name].value = value; }
-void Arguments::AddBuffer(const std::string &name, const GPUBufferDescriptor &desc)
-{
-  buffers_[name] = desc;
-}
-void Arguments::AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc)
-{
-  images2d_[name] = desc;
-}
-
-void Arguments::AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc)
-{
-  image2d_arrays_[name] = desc;
-}
-
-void Arguments::AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc)
-{
-  images3d_[name] = desc;
-}
-
-void Arguments::AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc)
-{
-  image_buffers_[name] = desc;
-}
-
-void Arguments::AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc)
-{
-  custom_memories_[name] = desc;
-}
-
-void Arguments::AddObjectRef(const std::string &name, AccessType access_type,
-                             GPUObjectDescriptorPtr &&descriptor_ptr)
-{
-  descriptor_ptr->SetAccess(access_type);
-  object_refs_[name] = {std::move(descriptor_ptr)};
-}
-
-void Arguments::AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr)
-{
-  descriptor_ptr->SetAccess(AccessType::READ);
-  objects_[name] = {nullptr, std::move(descriptor_ptr)};
-}
-
-void Arguments::AddGPUResources(const std::string &name, const GPUResources &resources)
-{
-  for (const auto &r : resources.ints)
-  {
-    AddInt(absl::StrCat(name, "_", r));
-  }
-  for (const auto &r : resources.floats)
-  {
-    AddFloat(absl::StrCat(name, "_", r));
-  }
-  for (const auto &r : resources.buffers)
-  {
-    AddBuffer(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.images2d)
-  {
-    AddImage2D(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.image2d_arrays)
-  {
-    AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.images3d)
-  {
-    AddImage3D(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.image_buffers)
-  {
-    AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.custom_memories)
-  {
-    AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
-  }
-}
-
-absl::Status Arguments::SetInt(const std::string &name, int value)
-{
-  auto it = int_values_.find(name);
-  if (it == int_values_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No int argument with name - ", name));
-  }
-  it->second.value = value;
-  if (it->second.active)
-  {
-    shared_int4s_data_[it->second.offset] = value;
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetFloat(const std::string &name, float value)
-{
-  auto it = float_values_.find(name);
-  if (it == float_values_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No float argument with name - ", name));
-  }
-  it->second.value = value;
-  if (it->second.active)
-  {
-    shared_float4s_data_[it->second.offset] = value;
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2D(const std::string &name, cl_mem memory)
-{
-  auto it = images2d_.find(name);
-  if (it == images2d_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image2D argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetBuffer(const std::string &name, cl_mem memory)
-{
-  auto it = buffers_.find(name);
-  if (it == buffers_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No buffer argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2DArray(const std::string &name, cl_mem memory)
-{
-  auto it = image2d_arrays_.find(name);
-  if (it == image2d_arrays_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image2D array argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage3D(const std::string &name, cl_mem memory)
-{
-  auto it = images3d_.find(name);
-  if (it == images3d_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image3D argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImageBuffer(const std::string &name, cl_mem memory)
-{
-  auto it = image_buffers_.find(name);
-  if (it == image_buffers_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image buffer argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetCustomMemory(const std::string &name, cl_mem memory)
-{
-  auto it = custom_memories_.find(name);
-  if (it == custom_memories_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No custom memory argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetObjectRef(const std::string &name, const GPUObject *object)
-{
-  auto it = object_refs_.find(name);
-  if (it == object_refs_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No object ref with name - ", name));
-  }
-  GPUResourcesWithValue resources;
-  RETURN_IF_ERROR(object->GetGPUResources(it->second.descriptor.get(), &resources));
-  return SetGPUResources(name, resources);
-}
-
-absl::Status Arguments::SetGPUResources(const std::string &name,
-                                        const GPUResourcesWithValue &resources)
-{
-  for (const auto &r : resources.ints)
-  {
-    RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.floats)
-  {
-    RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.buffers)
-  {
-    RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.images2d)
-  {
-    RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.image2d_arrays)
-  {
-    RETURN_IF_ERROR(SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.images3d)
-  {
-    RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.image_buffers)
-  {
-    RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.custom_memories)
-  {
-    RETURN_IF_ERROR(SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
-  }
-  return absl::OkStatus();
-}
-void Arguments::RenameArgs(const std::string &postfix, std::string *code) const
-{
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position + strlen(kArgsPrefix);
-    std::string arg_name = GetNextWord(*code, arg_pos);
-    code->replace(arg_pos, arg_name.size(), arg_name + postfix);
-    next_position = code->find(kArgsPrefix, arg_pos + arg_name.size());
-  }
-}
-
-absl::Status Arguments::Merge(Arguments &&args, const std::string &postfix)
-{
-  std::vector<std::string> object_names;
-  object_names.reserve(args.object_refs_.size() + args.objects_.size());
-  for (auto &v : args.object_refs_)
-  {
-    object_names.push_back(v.first);
-    const std::string name = v.first + postfix;
-    if (object_refs_.find(name) != object_refs_.end())
-    {
-      return absl::InvalidArgumentError(
-        absl::StrCat("Object reference name collision. Name - ", name));
-    }
-    object_refs_[name] = {std::move(v.second.descriptor)};
-  }
-  for (auto &v : args.objects_)
-  {
-    object_names.push_back(v.first);
-    const std::string name = v.first + postfix;
-    if (objects_.find(name) != objects_.end())
-    {
-      return absl::InvalidArgumentError(absl::StrCat("Object name collision. Name - ", name));
-    }
-    objects_[name] = {std::move(v.second.obj_ptr), std::move(v.second.descriptor)};
-  }
-  for (const auto &v : args.int_values_)
-  {
-    AddInt(RenameArg(object_names, postfix, v.first), v.second.value);
-  }
-  for (const auto &v : args.float_values_)
-  {
-    AddFloat(RenameArg(object_names, postfix, v.first), v.second.value);
-  }
-  for (const auto &v : args.buffers_)
-  {
-    AddBuffer(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.images2d_)
-  {
-    AddImage2D(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.image2d_arrays_)
-  {
-    AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.images3d_)
-  {
-    AddImage3D(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.image_buffers_)
-  {
-    AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.custom_memories_)
-  {
-    AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::TransformToCLCode(const DeviceInfo &device_info,
-                                          const std::map<std::string, std::string> &linkables,
-                                          std::string *code)
-{
-  RETURN_IF_ERROR(AddObjectArgs());
-  RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code));
-  ResolveArgsPass(device_info, code);
-  *code = absl::Substitute(*code, GetListOfArgs());
-  *code = GetDefaultSamplers(device_info) + *code;
-  return absl::OkStatus();
-}
-
-std::string Arguments::GetListOfArgs()
-{
-  std::string result;
-  for (auto &t : buffers_)
-  {
-    const std::string type_name = t.second.data_type == DataType::FLOAT32 ? "float" : "half";
-    std::string attributes;
-    for (const auto &attr : t.second.attributes)
-    {
-      attributes += absl::StrCat("  __attribute__((", attr, "))");
-    }
-    AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
-                                ToCLDataType(t.second.data_type, t.second.element_size), "* ",
-                                t.first, attributes),
-                   &result);
-  }
-  for (auto &t : image_buffers_)
-  {
-    AppendArgument(
-      absl::StrCat(GetImageModifier(t.second.access_type), " image1d_buffer_t ", t.first), &result);
-  }
-  for (auto &t : images2d_)
-  {
-    AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image2d_t ", t.first),
-                   &result);
-  }
-  for (auto &t : image2d_arrays_)
-  {
-    AppendArgument(
-      absl::StrCat(GetImageModifier(t.second.access_type), " image2d_array_t ", t.first), &result);
-  }
-  for (auto &t : images3d_)
-  {
-    AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image3d_t ", t.first),
-                   &result);
-  }
-  for (auto &t : custom_memories_)
-  {
-    AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
-  }
-  for (uint32_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
-  {
-    AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
-  }
-  for (uint32_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
-  {
-    AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
-  }
-  return result;
-}
-
-absl::Status Arguments::Bind(cl_kernel kernel, int offset)
-{
-  for (auto &t : buffers_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : image_buffers_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : images2d_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : image2d_arrays_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : images3d_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : custom_memories_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (size_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
-  {
-    const int error_code =
-      clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_int4s_data_[i * 4]);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (size_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
-  {
-    const int error_code =
-      clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_float4s_data_[i * 4]);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  return absl::OkStatus();
-}
-
-std::string Arguments::AddActiveArgument(const std::string &arg_name, bool)
-{
-  {
-    auto it = int_values_.find(arg_name);
-    if (it != int_values_.end())
-    {
-      int int_index;
-      if (it->second.active)
-      {
-        int_index = it->second.offset;
-      }
-      else
-      {
-        it->second.active = true;
-        it->second.offset = shared_int4s_data_.size();
-        int_index = it->second.offset;
-        shared_int4s_data_.push_back(it->second.value);
-      }
-      std::string index = std::to_string(int_index / 4);
-      std::string postfixes[4] = {"x", "y", "z", "w"};
-      return "shared_int4_" + index + "." + postfixes[int_index % 4];
-    }
-  }
-  {
-    auto it = float_values_.find(arg_name);
-    if (it != float_values_.end())
-    {
-      int float_index;
-      if (it->second.active)
-      {
-        float_index = it->second.offset;
-      }
-      else
-      {
-        it->second.active = true;
-        it->second.offset = shared_float4s_data_.size();
-        float_index = it->second.offset;
-        shared_float4s_data_.push_back(it->second.value);
-      }
-      std::string index = std::to_string(float_index / 4);
-      std::string postfixes[4] = {"x", "y", "z", "w"};
-      return "shared_float4_" + index + "." + postfixes[float_index % 4];
-    }
-  }
-  return arg_name;
-}
-
-void Arguments::ResolveArgsPass(const DeviceInfo &device_info, std::string *code)
-{
-  bool use_f32_for_half_arguments = device_info.IsPowerVR();
-  size_t position = 0;
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position;
-    next_position += strlen(kArgsPrefix);
-    std::string object_name = GetNextWord(*code, next_position);
-    std::string new_name = AddActiveArgument(object_name, use_f32_for_half_arguments);
-    code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
-    position = arg_pos + new_name.size();
-    next_position = code->find(kArgsPrefix, position);
-  }
-
-  int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
-  shared_int4s_data_.resize(shared_int4s_aligned_size);
-  int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
-  shared_float4s_data_.resize(shared_float4s_aligned_size);
-}
-
-void Arguments::ResolveObjectNames(const std::string &object_name,
-                                   const std::vector<std::string> &member_names, std::string *code)
-{
-  for (const auto &member_name : member_names)
-  {
-    const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
-    ReplaceAllWords(member_name, new_name, code);
-  }
-}
-
-GPUObjectDescriptor *Arguments::GetObjectDescriptor(const std::string &object_name) const
-{
-  {
-    auto it = object_refs_.find(object_name);
-    if (it != object_refs_.end())
-    {
-      return it->second.descriptor.get();
-    }
-  }
-  {
-    auto it = objects_.find(object_name);
-    if (it != objects_.end())
-    {
-      return it->second.descriptor.get();
-    }
-  }
-  return nullptr;
-}
-
-absl::Status Arguments::ResolveSelector(const std::map<std::string, std::string> &linkables,
-                                        const std::string &object_name, const std::string &selector,
-                                        const std::vector<std::string> &args,
-                                        const std::vector<std::string> &template_args,
-                                        std::string *result)
-{
-  const GPUObjectDescriptor *desc_ptr = GetObjectDescriptor(object_name);
-  if (!desc_ptr)
-  {
-    return absl::NotFoundError(absl::StrCat("No object with name - ", object_name));
-  }
-  auto names = desc_ptr->GetGPUResources().GetNames();
-  const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(desc_ptr);
-  if (tensor_desc && selector == "Write")
-  {
-    auto it = linkables.find(object_name);
-    if (it != linkables.end())
-    {
-      if (desc_ptr->GetAccess() != AccessType::WRITE &&
-          desc_ptr->GetAccess() != AccessType::READ_WRITE)
-      {
-        return absl::FailedPreconditionError(
-          absl::StrCat("Object with name - ", object_name, " should have Write access."));
-      }
-      std::string value_name, x_coord, y_coord, s_coord;
-      RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(args, &value_name, &x_coord,
-                                                                      &y_coord, &s_coord));
-      // x_coord can have batch size property of link_object
-      ResolveObjectNames(object_name, names, &x_coord);
-      *result = it->second;
-      ReplaceAllWords("in_out_value", value_name, result);
-      ReplaceAllWords("X_COORD", x_coord, result);
-      ReplaceAllWords("Y_COORD", y_coord, result);
-      ReplaceAllWords("S_COORD", s_coord, result);
-      RETURN_IF_ERROR(ResolveSelectorsPass({}, result));
-    }
-  }
-  std::string patch;
-  RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, args, template_args, &patch));
-  ResolveObjectNames(object_name, names, &patch);
-  *result += patch;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
-                                             std::string *code)
-{
-  std::string result;
-  size_t position = 0;
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position;
-    next_position += strlen(kArgsPrefix);
-    std::string object_name = GetNextWord(*code, next_position);
-    char next = (*code)[next_position + object_name.size()];
-    if (next == '.')
-    {
-      next_position += object_name.size() + 1;
-      std::string selector_name = GetNextWord(*code, next_position);
-      next_position += selector_name.size();
-      next = (*code)[next_position];
-      std::vector<std::string> template_args;
-      if (next == '<')
-      {
-        size_t close_bracket_pos;
-        RETURN_IF_ERROR(
-          ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &template_args));
-        next_position = close_bracket_pos;
-        next = (*code)[next_position];
-      }
-      if (next != '(')
-      {
-        return absl::NotFoundError(
-          absl::StrCat("Expected ( after ", object_name, ".", selector_name, " call"));
-      }
-      std::vector<std::string> args;
-      size_t close_bracket_pos;
-      RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &args));
-      for (auto &arg : args)
-      {
-        RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg));
-      }
-      std::string patch;
-      RETURN_IF_ERROR(
-        ResolveSelector(linkables, object_name, selector_name, args, template_args, &patch));
-      code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
-      position = arg_pos + patch.size();
-    }
-    else
-    {
-      position = arg_pos + strlen(kArgsPrefix);
-    }
-    next_position = code->find(kArgsPrefix, position);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::AllocateObjects(CLContext *context)
-{
-  for (auto &t : objects_)
-  {
-    RETURN_IF_ERROR(t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr));
-  }
-  return absl::OkStatus();
-}
-
-void Arguments::ReleaseCPURepresentation()
-{
-  for (auto &t : objects_)
-  {
-    t.second.descriptor->Release();
-  }
-}
-
-absl::Status Arguments::AddObjectArgs()
-{
-  for (auto &t : objects_)
-  {
-    AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
-    GPUResourcesWithValue resources;
-    RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(), &resources));
-    RETURN_IF_ERROR(SetGPUResources(t.first, resources));
-  }
-  for (auto &t : object_refs_)
-  {
-    AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
-  }
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h b/runtime/onert/backend/gpu_cl/open_cl/Arguments.h

deleted file mode 100644 (file)

index 0c6ce1e..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "ClDevice.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-
-#include "AccessType.h"
-#include "Types.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ArgumentsBinder
-{
-public:
-  virtual absl::Status SetInt(const std::string &name, int value) = 0;
-  virtual absl::Status SetFloat(const std::string &name, float value) = 0;
-  virtual ~ArgumentsBinder() = default;
-};
-
-class Arguments : public ArgumentsBinder
-{
-public:
-  Arguments() = default;
-  void AddFloat(const std::string &name, float value = 0.0f);
-  void AddInt(const std::string &name, int value = 0);
-  void AddObjectRef(const std::string &name, AccessType access_type,
-                    GPUObjectDescriptorPtr &&descriptor_ptr);
-  void AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr);
-
-  absl::Status SetInt(const std::string &name, int value) override;
-  absl::Status SetFloat(const std::string &name, float value) override;
-  absl::Status SetObjectRef(const std::string &name, const GPUObject *object);
-
-  absl::Status Bind(cl_kernel kernel, int offset = 0);
-
-  void RenameArgs(const std::string &postfix, std::string *code) const;
-  absl::Status Merge(Arguments &&args, const std::string &postfix);
-
-  absl::Status AllocateObjects(CLContext *context);
-  void ReleaseCPURepresentation();
-  absl::Status TransformToCLCode(const DeviceInfo &device_info,
-                                 const std::map<std::string, std::string> &linkables,
-                                 std::string *code);
-
-  // Move only
-  Arguments(Arguments &&args);
-  Arguments &operator=(Arguments &&args);
-  Arguments(const Arguments &) = delete;
-  Arguments &operator=(const Arguments &) = delete;
-
-  ~Arguments() override = default;
-
-private:
-  void AddBuffer(const std::string &name, const GPUBufferDescriptor &desc);
-  void AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc);
-  void AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc);
-  void AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc);
-  void AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc);
-  void AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc);
-
-  absl::Status SetImage2D(const std::string &name, cl_mem memory);
-  absl::Status SetBuffer(const std::string &name, cl_mem memory);
-  absl::Status SetImage2DArray(const std::string &name, cl_mem memory);
-  absl::Status SetImage3D(const std::string &name, cl_mem memory);
-  absl::Status SetImageBuffer(const std::string &name, cl_mem memory);
-  absl::Status SetCustomMemory(const std::string &name, cl_mem memory);
-
-  std::string GetListOfArgs();
-
-  std::string AddActiveArgument(const std::string &arg_name, bool use_f32_for_halfs);
-  void AddGPUResources(const std::string &name, const GPUResources &resources);
-
-  absl::Status SetGPUResources(const std::string &name, const GPUResourcesWithValue &resources);
-
-  absl::Status AddObjectArgs();
-
-  void ResolveArgsPass(const DeviceInfo &device_info, std::string *code);
-  absl::Status ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
-                                    std::string *code);
-
-  absl::Status ResolveSelector(const std::map<std::string, std::string> &linkables,
-                               const std::string &object_name, const std::string &selector,
-                               const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args, std::string *result);
-
-  void ResolveObjectNames(const std::string &object_name,
-                          const std::vector<std::string> &member_names, std::string *code);
-
-  GPUObjectDescriptor *GetObjectDescriptor(const std::string &object_name) const;
-
-  static constexpr char kArgsPrefix[] = "args.";
-
-  struct IntValue
-  {
-    int value;
-
-    // many uniforms generated automatically and not used
-    // to reduce amount of data transferred we adding this optimization
-    bool active = false;
-
-    // offset to shared uniform storage.
-    uint32_t offset = -1;
-  };
-  std::map<std::string, IntValue> int_values_;
-  std::vector<int32_t> shared_int4s_data_;
-
-  struct FloatValue
-  {
-    float value;
-
-    // many uniforms generated automatically and not used
-    // to reduce amount of data transferred we adding this optimization
-    bool active = false;
-
-    // offset to shared uniform storage.
-    uint32_t offset = -1;
-  };
-  std::map<std::string, FloatValue> float_values_;
-  std::vector<float> shared_float4s_data_;
-
-  std::map<std::string, GPUBufferDescriptor> buffers_;
-  std::map<std::string, GPUImage2DDescriptor> images2d_;
-  std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
-  std::map<std::string, GPUImage3DDescriptor> images3d_;
-  std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
-  std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
-
-  struct ObjectRefArg
-  {
-    GPUObjectDescriptorPtr descriptor;
-  };
-  std::map<std::string, ObjectRefArg> object_refs_;
-
-  struct ObjectArg
-  {
-    GPUObjectPtr obj_ptr;
-    GPUObjectDescriptorPtr descriptor;
-  };
-  std::map<std::string, ObjectArg> objects_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc b/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc

deleted file mode 100644 (file)

index 64c0719..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Buffer.h"
-
-#include <string>
-
-#include "ClContext.h"
-#include "DataType.h"
-#include "GpuObject.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void *data,
-                          CLContext *context, Buffer *result)
-{
-  cl_mem buffer;
-  RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes, gpu_read_only,
-                                 const_cast<void *>(data), &buffer));
-  *result = Buffer(buffer, size_in_bytes);
-
-  return absl::OkStatus();
-}
-
-} // namespace
-
-BufferDescriptor::BufferDescriptor(BufferDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
-    element_size(desc.element_size), memory_type(desc.memory_type),
-    attributes(std::move(desc.attributes)), size(desc.size), data(std::move(desc.data))
-{
-}
-
-BufferDescriptor &BufferDescriptor::operator=(BufferDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(element_type, desc.element_type);
-    std::swap(element_size, desc.element_size);
-    std::swap(memory_type, desc.memory_type);
-    attributes = std::move(desc.attributes);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void BufferDescriptor::Release() { data.clear(); }
-
-GPUResources BufferDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  GPUBufferDescriptor desc;
-  desc.data_type = element_type;
-  desc.access_type = access_type_;
-  desc.element_size = element_size;
-  desc.memory_type = memory_type;
-  desc.attributes = attributes;
-  resources.buffers.push_back({"buffer", desc});
-  return resources;
-}
-
-absl::Status BufferDescriptor::PerformSelector(const std::string &selector,
-                                               const std::vector<std::string> &args,
-                                               const std::vector<std::string> &template_args,
-                                               std::string *result) const
-{
-  if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else if (selector == "GetPtr")
-  {
-    return PerformGetPtrSelector(args, template_args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("BufferDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status BufferDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                   std::string *result) const
-{
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(
-      absl::StrCat("BufferDescriptor Read require one argument, but ", args.size(), " was passed"));
-  }
-  *result = absl::StrCat("buffer[", args[0], "]");
-  return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::PerformGetPtrSelector(const std::vector<std::string> &args,
-                                                     const std::vector<std::string> &template_args,
-                                                     std::string *result) const
-{
-  if (args.size() > 1)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "BufferDescriptor GetPtr require one or zero arguments, but ", args.size(), " was passed"));
-  }
-  if (template_args.size() > 1)
-  {
-    return absl::NotFoundError(absl::StrCat("BufferDescriptor GetPtr require one or zero teemplate "
-                                            "arguments, but ",
-                                            template_args.size(), " was passed"));
-  }
-  std::string conversion;
-  if (template_args.size() == 1)
-  {
-    const std::string type_name = ToCLDataType(element_type, element_size);
-    if (type_name != template_args[0])
-    {
-      conversion = absl::StrCat("(", MemoryTypeToCLType(memory_type), " ", template_args[0], "*)&");
-    }
-  }
-  if (args.empty())
-  {
-    *result = absl::StrCat(conversion, "buffer");
-  }
-  else if (conversion.empty())
-  {
-    *result = absl::StrCat("(buffer + ", args[0], ")");
-  }
-  else
-  {
-    *result = absl::StrCat(conversion, "buffer[", args[0], "]");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Buffer gpu_buffer;
-  RETURN_IF_ERROR(gpu_buffer.CreateFromBufferDescriptor(*this, context));
-  *result = absl::make_unique<Buffer>(std::move(gpu_buffer));
-  return absl::OkStatus();
-}
-
-Buffer::Buffer(cl_mem buffer, size_t size_in_bytes) : buffer_(buffer), size_(size_in_bytes) {}
-
-Buffer::Buffer(Buffer &&buffer) : buffer_(buffer.buffer_), size_(buffer.size_)
-{
-  buffer.buffer_ = nullptr;
-  buffer.size_ = 0;
-}
-
-Buffer &Buffer::operator=(Buffer &&buffer)
-{
-  if (this != &buffer)
-  {
-    Release();
-    std::swap(size_, buffer.size_);
-    std::swap(buffer_, buffer.buffer_);
-  }
-  return *this;
-}
-
-void Buffer::Release()
-{
-  if (buffer_)
-  {
-    clReleaseMemObject(buffer_);
-    buffer_ = nullptr;
-    size_ = 0;
-  }
-}
-
-absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                     GPUResourcesWithValue *resources) const
-{
-  const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
-  if (!buffer_desc)
-  {
-    return absl::InvalidArgumentError("Expected BufferDescriptor on input.");
-  }
-
-  resources->buffers.push_back({"buffer", buffer_});
-  return absl::OkStatus();
-}
-
-absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context)
-{
-  bool read_only = desc.memory_type == MemoryType::CONSTANT;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  size_ = desc.size;
-  return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr, &buffer_);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, true, nullptr, context, result);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
-                                  Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, true, data, context, result);
-}
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, false, nullptr, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h b/runtime/onert/backend/gpu_cl/open_cl/Buffer.h

deleted file mode 100644 (file)

index 39e97be..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct BufferDescriptor : public GPUObjectDescriptor
-{
-  DataType element_type;
-  int element_size;
-  MemoryType memory_type = MemoryType::GLOBAL;
-  std::vector<std::string> attributes;
-
-  // optional
-  int size = 0;
-  std::vector<uint8_t> data;
-
-  BufferDescriptor() = default;
-  BufferDescriptor(const BufferDescriptor &) = default;
-  BufferDescriptor &operator=(const BufferDescriptor &) = default;
-  BufferDescriptor(BufferDescriptor &&desc);
-  BufferDescriptor &operator=(BufferDescriptor &&desc);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-  absl::Status PerformGetPtrSelector(const std::vector<std::string> &args,
-                                     const std::vector<std::string> &template_args,
-                                     std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-// Buffer represent linear GPU data storage with arbitrary data format.
-// Buffer is moveable but not copyable.
-class Buffer : public GPUObject
-{
-public:
-  Buffer() {} // just for using Buffer as a class members
-  Buffer(cl_mem buffer, size_t size_in_bytes);
-
-  // Move only
-  Buffer(Buffer &&buffer);
-  Buffer &operator=(Buffer &&buffer);
-  Buffer(const Buffer &) = delete;
-  Buffer &operator=(const Buffer &) = delete;
-
-  virtual ~Buffer() { Release(); }
-
-  // for profiling and memory statistics
-  uint64_t GetMemorySizeInBytes() const { return size_; }
-
-  cl_mem GetMemoryPtr() const { return buffer_; }
-
-  // Writes data to a buffer. Data should point to a region that
-  // has exact size in bytes as size_in_bytes(constructor parameter).
-  template <typename T> absl::Status WriteData(CLCommandQueue *queue, const std::vector<T> *data);
-
-  // Reads data from Buffer into CPU memory.
-  template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem buffer_ = nullptr;
-  size_t size_ = 0;
-};
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
-                                  Buffer *result);
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc

deleted file mode 100644 (file)

index d147b7b..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClCommandQueue.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-#include <limits>
-
-#include "absl/strings/str_cat.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-using namespace std;
-
-CLCommandQueue::CLCommandQueue(cl_command_queue queue, bool has_ownership)
-  : queue_(queue), has_ownership_(has_ownership)
-{
-}
-
-CLCommandQueue::CLCommandQueue(CLCommandQueue &&queue)
-  : queue_(queue.queue_), has_ownership_(queue.has_ownership_)
-{
-  queue.queue_ = nullptr;
-}
-
-CLCommandQueue &CLCommandQueue::operator=(CLCommandQueue &&queue)
-{
-  if (this != &queue)
-  {
-    Release();
-    std::swap(queue_, queue.queue_);
-    has_ownership_ = queue.has_ownership_;
-  }
-  return *this;
-}
-
-CLCommandQueue::~CLCommandQueue() { Release(); }
-
-void CLCommandQueue::Release()
-{
-  if (has_ownership_ && queue_)
-  {
-    clReleaseCommandQueue(queue_);
-    queue_ = nullptr;
-  }
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                      const int3 &work_group_size, CLEvent *event)
-{
-  std::vector<size_t> local(3);
-  std::vector<size_t> global(3);
-  for (int i = 0; i < 3; ++i)
-  {
-    local[i] = work_group_size[i];
-    global[i] = work_groups_count[i] * work_group_size[i];
-  }
-  cl_event resulting_event;
-  const int error_code =
-    clEnqueueNDRangeKernel(queue_, kernel.kernel(), 3, nullptr, global.data(), local.data(), 0,
-                           nullptr, event ? &resulting_event : nullptr);
-  if (event)
-  {
-    *event = CLEvent(resulting_event);
-  }
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clEnqueueNDRangeKernel - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                      const int3 &work_group_size)
-{
-  return Dispatch(kernel, work_groups_count, work_group_size, nullptr);
-}
-
-absl::Status CLCommandQueue::EnqueueEvent(CLEvent *event)
-{
-  cl_event resulting_event;
-  const int error_code = clEnqueueMarker(queue_, &resulting_event);
-  *event = CLEvent(resulting_event);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clEnqueueMarker - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region, const void *data)
-{
-  const size_t origin[] = {0, 0, 0};
-  const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
-                      static_cast<size_t>(region.z)};
-  auto error_code =
-    clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region, void *data)
-{
-  const size_t origin[] = {0, 0, 0};
-  const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
-                      static_cast<size_t>(region.z)};
-  auto error_code =
-    clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
-                                                const void *data)
-{
-  auto error_code =
-    clEnqueueWriteBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data)
-{
-  auto error_code =
-    clEnqueueReadBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::WaitForCompletion()
-{
-  auto error_code = clFinish(queue_);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue) : CLCommandQueue(queue, true)
-{
-  events_.reserve(128);
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(ProfilingCommandQueue &&queue)
-  : CLCommandQueue(std::move(queue)), events_(std::move(queue.events_)),
-    current_label_(std::move(queue.current_label_))
-{
-}
-
-ProfilingCommandQueue &ProfilingCommandQueue::operator=(ProfilingCommandQueue &&queue)
-{
-  if (this != &queue)
-  {
-    events_ = std::move(queue.events_);
-    current_label_ = std::move(queue.current_label_);
-    CLCommandQueue::operator=(std::move(queue));
-  }
-  return *this;
-}
-
-void ProfilingCommandQueue::SetEventsLabel(const std::string &name) { current_label_ = name; }
-
-void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); }
-
-absl::Status ProfilingCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                             const int3 &work_group_size)
-{
-  events_.push_back(CLEvent());
-  RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count, work_group_size,
-                                           &events_[events_.size() - 1]));
-  events_.back().SetName(current_label_);
-  return absl::OkStatus();
-}
-
-absl::Status
-ProfilingCommandQueue::GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
-                                             const std::vector<int3> &work_groups_count,
-                                             const std::vector<int3> &work_group_sizes, int *index)
-{
-  // Some Adreno 3xx can have wrong numbers for some events
-  const bool possible_bug_with_events = device_info.IsAdreno3xx();
-  events_.resize(work_group_sizes.size());
-  for (size_t i = 0; i < work_group_sizes.size(); ++i)
-  {
-    RETURN_IF_ERROR(
-      CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i]));
-
-    // reducing the speed of memory leak on Mali for some kernels
-    if (device_info.IsMali() && i % 8 == 7)
-    {
-      events_[i - 7].Wait();
-    }
-    if (possible_bug_with_events)
-    {
-      // We are trying to increase probability for correct result.
-      RETURN_IF_ERROR(WaitForCompletion());
-    }
-  }
-
-  RETURN_IF_ERROR(WaitForCompletion());
-
-  // To release memory of some kernel pool on Mali.
-  if (device_info.IsMali())
-  {
-    RETURN_IF_ERROR(kernel.ReInit());
-  }
-
-  int minimum_index = 0;
-  double minimum_time = std::numeric_limits<double>::max();
-  if (possible_bug_with_events)
-  { // we will try to cut out suspicious results
-    double average_time = 0.0;
-    int average_samples_count = 0;
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      if (events_[i].GetEventTimeMs() < 100 * 1000)
-      { // 100 sec
-        average_time += events_[i].GetEventTimeMs();
-        average_samples_count++;
-      }
-    }
-    if (average_samples_count == 0)
-    {
-      throw std::runtime_error("It cannot be divided by zero");
-    }
-    else
-    {
-      average_time /= average_samples_count;
-    }
-
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      double time = events_[i].GetEventTimeMs();
-      if (time < minimum_time && time >= 0.1 * average_time)
-      {
-        minimum_index = i;
-        minimum_time = time;
-      }
-    }
-  }
-  else
-  {
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      double time = events_[i].GetEventTimeMs();
-      if (time < minimum_time)
-      {
-        minimum_index = i;
-        minimum_time = time;
-      }
-    }
-  }
-
-  *index = minimum_index;
-
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
-                                  CLCommandQueue *result)
-{
-  int error_code;
-  cl_command_queue queue = clCreateCommandQueue(context.context(), device.id(), 0, &error_code);
-  if (!queue)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
-  }
-  *result = CLCommandQueue(queue, true);
-  return absl::OkStatus();
-}
-
-double ProfilingCommandQueue::GetQueueExecutionTimeMs() const
-{
-  const uint64_t start = events_.front().GetStartedTimeNs();
-  const uint64_t end = events_.back().GetFinishedTimeNs();
-  const uint64_t time_ns = (end - start);
-
-  return static_cast<double>(time_ns) / 1000000.0;
-}
-
-double ProfilingCommandQueue::GetSumOfEventsTimeMs() const
-{
-  double sum = 0.0;
-  for (uint32_t i = 0; i < events_.size(); ++i)
-  {
-    sum += events_[i].GetEventTimeMs();
-  }
-  return sum;
-}
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
-                                         ProfilingCommandQueue *result)
-{
-  int error_code;
-  cl_command_queue queue =
-    clCreateCommandQueue(context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code);
-  if (!queue)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
-  }
-
-  *result = ProfilingCommandQueue(queue);
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h

deleted file mode 100644 (file)

index 81f93fd..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/time/time.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "ClKernel.h"
-#include "OpenclWrapper.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ProfilingInfo
-{
-  struct DispatchInfo
-  {
-    std::string label;
-    absl::Duration duration;
-  };
-
-  std::vector<DispatchInfo> dispatches;
-
-  absl::Duration GetTotalTime() const;
-
-  // Returns report (string of lines delimited by \n)
-  // This method uses GPU counters and measure GPU time only.
-  // Report has next structure:
-  // Per kernel timing(K kernels):
-  //   conv2d 3.2ms
-  //   ...
-  // --------------------
-  // Accumulated time per operation type:
-  //   conv2d - 14.5ms
-  //   ....
-  // --------------------
-  // Ideal total time: 23.4ms // Total time for all kernels
-  std::string GetDetailedReport() const;
-};
-
-// A wrapper around opencl command queue
-class CLCommandQueue
-{
-public:
-  CLCommandQueue() {}
-  CLCommandQueue(cl_command_queue queue, bool has_ownership);
-
-  // Move only
-  CLCommandQueue(CLCommandQueue &&queue);
-  CLCommandQueue &operator=(CLCommandQueue &&queue);
-  CLCommandQueue(const CLCommandQueue &) = delete;
-  CLCommandQueue &operator=(const CLCommandQueue &) = delete;
-
-  virtual ~CLCommandQueue();
-
-  cl_command_queue queue() const { return queue_; }
-
-  virtual absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                const int3 &work_group_size);
-
-  absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                        const int3 &work_group_size, CLEvent *event);
-
-  absl::Status EnqueueEvent(CLEvent *event);
-
-  absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void *data);
-  absl::Status EnqueueReadImage(cl_mem memory, int3 region, void *data);
-
-  absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, const void *data);
-  absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data);
-
-  absl::Status WaitForCompletion();
-
-protected:
-  void Release();
-
-  cl_command_queue queue_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-class ProfilingCommandQueue : public CLCommandQueue
-{
-public:
-  ProfilingCommandQueue() {}
-  explicit ProfilingCommandQueue(cl_command_queue queue);
-
-  // Move only
-  ProfilingCommandQueue(ProfilingCommandQueue &&queue);
-  ProfilingCommandQueue &operator=(ProfilingCommandQueue &&queue);
-  ProfilingCommandQueue(const ProfilingCommandQueue &) = delete;
-  ProfilingCommandQueue &operator=(const ProfilingCommandQueue &) = delete;
-
-  absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                        const int3 &work_group_size) override;
-
-  // will write index for fastest work_group among work_group_sizes
-  absl::Status GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
-                                     const std::vector<int3> &work_groups_count,
-                                     const std::vector<int3> &work_group_sizes, int *index);
-
-  // call ResetMeasurements() to start new seriese of measurements
-  void ResetMeasurements();
-
-  double GetQueueExecutionTimeMs() const;
-
-  // Difference from GetQueueExecutionTimeMs is that this number doesn't include
-  // time between kernels(kernels launches or preparing) on GPU. Usually, this
-  // time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
-  // spend on something else(maybe kernels launches or preparing)
-  double GetSumOfEventsTimeMs() const;
-
-  // This label will be used for all subsequent dispatches.
-  void SetEventsLabel(const std::string &name);
-
-private:
-  std::vector<CLEvent> events_;
-  std::string current_label_;
-};
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
-                                  CLCommandQueue *result);
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
-                                         ProfilingCommandQueue *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc b/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc

deleted file mode 100644 (file)

index 3289ff9..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClContext.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClImageFormat.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context, cl_mem_flags flags)
-{
-  cl_uint num_image_formats;
-  cl_int error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, 0, nullptr,
-                                            &num_image_formats);
-  if (error != CL_SUCCESS)
-  {
-    return {};
-  }
-
-  std::vector<cl_image_format> result(num_image_formats);
-  error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, num_image_formats,
-                                     &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return {};
-  }
-  return result;
-}
-
-bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, int num_channels)
-{
-  return image_format.image_channel_data_type == ToImageChannelType(data_type) &&
-         image_format.image_channel_order == ToChannelOrder(num_channels);
-}
-
-void AddSupportedImageFormats(cl_context context, DeviceInfo *info)
-{
-  auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
-  for (auto format : supported_formats)
-  {
-    info->supports_r_f16_tex2d =
-      info->supports_r_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 1);
-    info->supports_rg_f16_tex2d =
-      info->supports_rg_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 2);
-    info->supports_rgb_f16_tex2d =
-      info->supports_rgb_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 3);
-    info->supports_rgba_f16_tex2d =
-      info->supports_rgba_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 4);
-    info->supports_r_f32_tex2d =
-      info->supports_r_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 1);
-    info->supports_rg_f32_tex2d =
-      info->supports_rg_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 2);
-    info->supports_rgb_f32_tex2d =
-      info->supports_rgb_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 3);
-    info->supports_rgba_f32_tex2d =
-      info->supports_rgba_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 4);
-  }
-}
-
-absl::Status CreateCLContext(const CLDevice &device, cl_context_properties *properties,
-                             CLContext *result)
-{
-  int error_code;
-  cl_device_id device_id = device.id();
-  cl_context context = clCreateContext(properties, 1, &device_id, nullptr, nullptr, &error_code);
-  if (!context)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code)));
-  }
-  AddSupportedImageFormats(context, &device.info_);
-
-  *result = CLContext(context, true);
-  return absl::OkStatus();
-}
-
-} // namespace
-
-CLContext::CLContext(cl_context context, bool has_ownership)
-  : context_(context), has_ownership_(has_ownership)
-{
-}
-
-CLContext::CLContext(CLContext &&context)
-  : context_(context.context_), has_ownership_(context.has_ownership_)
-{
-  context.context_ = nullptr;
-}
-
-CLContext &CLContext::operator=(CLContext &&context)
-{
-  if (this != &context)
-  {
-    Release();
-    std::swap(context_, context.context_);
-    has_ownership_ = context.has_ownership_;
-  }
-  return *this;
-}
-
-CLContext::~CLContext() { Release(); }
-
-void CLContext::Release()
-{
-  if (has_ownership_ && context_)
-  {
-    clReleaseContext(context_);
-    context_ = nullptr;
-  }
-}
-
-bool CLContext::IsFloatTexture2DSupported(int num_channels, DataType data_type,
-                                          cl_mem_flags flags) const
-{
-  auto supported_formats = GetSupportedImage2DFormats(context_, flags);
-  for (auto format : supported_formats)
-  {
-    if (format.image_channel_data_type == ToImageChannelType(data_type) &&
-        format.image_channel_order == ToChannelOrder(num_channels))
-    {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result)
-{
-  return CreateCLContext(device, nullptr, result);
-}
-
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
-                               cl_context_properties egl_display, CLContext *result)
-{
-  if (!device.SupportsExtension("cl_khr_gl_sharing"))
-  {
-    return absl::UnavailableError("Device doesn't support CL-GL sharing.");
-  }
-  cl_context_properties platform = reinterpret_cast<cl_context_properties>(device.platform());
-  cl_context_properties props[] = {CL_GL_CONTEXT_KHR,
-                                   egl_context,
-                                   CL_EGL_DISPLAY_KHR,
-                                   egl_display,
-                                   CL_CONTEXT_PLATFORM,
-                                   platform,
-                                   0};
-  return CreateCLContext(device, props, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h b/runtime/onert/backend/gpu_cl/open_cl/ClContext.h

deleted file mode 100644 (file)

index cf1d0d2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl context
-class CLContext
-{
-public:
-  CLContext() {}
-  CLContext(cl_context context, bool has_ownership);
-
-  // Move only
-  CLContext(CLContext &&context);
-  CLContext &operator=(CLContext &&context);
-  CLContext(const CLContext &) = delete;
-  CLContext &operator=(const CLContext &) = delete;
-
-  ~CLContext();
-
-  cl_context context() const { return context_; }
-
-  bool IsFloatTexture2DSupported(int num_channels, DataType data_type,
-                                 cl_mem_flags flags = CL_MEM_READ_WRITE) const;
-
-private:
-  void Release();
-
-  cl_context context_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result);
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
-                               cl_context_properties egl_display, CLContext *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc

deleted file mode 100644 (file)

index 8dede13..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClDevice.h"
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Status.h"
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <> std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info)
-{
-  size_t size;
-  cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-
-  std::string result(size - 1, 0);
-  error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-  return result;
-}
-
-namespace
-{
-template <typename T> T GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
-  T result;
-  cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return -1;
-  }
-  return result;
-}
-
-std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
-  size_t size;
-  cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-
-  std::string result(size - 1, 0);
-  error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-  return result;
-}
-
-void GetDeviceWorkDimsSizes(cl_device_id id, int3 *result)
-{
-  int dims_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
-  if (dims_count < 3)
-  {
-    return;
-  }
-  std::vector<size_t> limits(dims_count);
-  cl_int error = clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * dims_count,
-                                 limits.data(), nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return;
-  }
-  // dims_count must be at least 3 according to spec
-  result->x = limits[0];
-  result->y = limits[1];
-  result->z = limits[2];
-}
-
-OpenCLVersion ParseCLVersion(const std::string &version)
-{
-  const auto first_dot_pos = version.find_first_of('.');
-  if (first_dot_pos == std::string::npos)
-  {
-    return OpenCLVersion::CL_1_0;
-  }
-  const int major = version[first_dot_pos - 1] - '0';
-  const int minor = version[first_dot_pos + 1] - '0';
-
-  if (major == 1)
-  {
-    if (minor == 2)
-    {
-      return OpenCLVersion::CL_1_2;
-    }
-    else if (minor == 1)
-    {
-      return OpenCLVersion::CL_1_1;
-    }
-    else
-    {
-      return OpenCLVersion::CL_1_0;
-    }
-  }
-  else if (major == 2)
-  {
-    if (minor == 2)
-    {
-      return OpenCLVersion::CL_2_2;
-    }
-    else if (minor == 1)
-    {
-      return OpenCLVersion::CL_2_1;
-    }
-    else
-    {
-      return OpenCLVersion::CL_2_0;
-    }
-  }
-  else if (major == 3)
-  {
-    return OpenCLVersion::CL_3_0;
-  }
-  else
-  {
-    return OpenCLVersion::CL_1_0;
-  }
-}
-
-Vendor ParseVendor(const std::string &device_name, const std::string &vendor_name)
-{
-  std::string d_name = device_name;
-  std::string v_name = vendor_name;
-  std::transform(d_name.begin(), d_name.end(), d_name.begin(), ::tolower);
-  std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower);
-  if (d_name.find("qualcomm") != std::string::npos || v_name.find("qualcomm") != std::string::npos)
-  {
-    return Vendor::kQualcomm;
-  }
-  else if (d_name.find("mali") != std::string::npos || v_name.find("mali") != std::string::npos)
-  {
-    return Vendor::kMali;
-  }
-  else if (d_name.find("power") != std::string::npos || v_name.find("power") != std::string::npos)
-  {
-    return Vendor::kPowerVR;
-  }
-  else if (d_name.find("nvidia") != std::string::npos || v_name.find("nvidia") != std::string::npos)
-  {
-    return Vendor::kNvidia;
-  }
-  else if (d_name.find("advanced micro devices") != std::string::npos ||
-           v_name.find("advanced micro devices") != std::string::npos)
-  {
-    return Vendor::kAMD;
-  }
-  else if (d_name.find("intel") != std::string::npos || v_name.find("intel") != std::string::npos)
-  {
-    return Vendor::kIntel;
-  }
-  else
-  {
-    return Vendor::kUnknown;
-  }
-}
-
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
-  return gpu_version >= min_version && gpu_version < max_version;
-}
-} // namespace
-
-DeviceInfo DeviceInfoFromDeviceID(cl_device_id id)
-{
-  DeviceInfo info;
-  const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
-  const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
-  const auto opencl_c_version = GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
-  info.vendor = ParseVendor(device_name, vendor_name);
-  if (info.vendor == Vendor::kQualcomm)
-  {
-    info.adreno_info = AdrenoInfo(opencl_c_version);
-  }
-  else if (info.vendor == Vendor::kMali)
-  {
-    info.mali_info = MaliInfo(device_name);
-  }
-  info.cl_version = ParseCLVersion(opencl_c_version);
-  info.extensions = absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
-
-  info.supports_fp16 = false;
-  info.supports_image3d_writes = false;
-  for (const auto &ext : info.extensions)
-  {
-    if (ext == "cl_khr_fp16")
-    {
-      info.supports_fp16 = true;
-    }
-    if (ext == "cl_khr_3d_image_writes")
-    {
-      info.supports_image3d_writes = true;
-    }
-  }
-
-  cl_device_fp_config f32_config =
-    GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
-  info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
-
-  if (info.supports_fp16)
-  {
-    cl_device_fp_config f16_config;
-    auto status = GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
-    // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
-    if (status.ok() && info.vendor != Vendor::kAMD)
-    {
-      info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
-    }
-    else
-    { // happens on PowerVR
-      f16_config = f32_config;
-      info.supports_fp16_rtn = info.supports_fp32_rtn;
-    }
-  }
-  else
-  {
-    info.supports_fp16_rtn = false;
-  }
-
-  if (info.vendor == Vendor::kPowerVR && !info.supports_fp16)
-  {
-    // PowerVR doesn't have full support of fp16 and so doesn't list this
-    // extension. But it can support fp16 in MADs and as buffers/textures types,
-    // so we will use it.
-    info.supports_fp16 = true;
-    info.supports_fp16_rtn = info.supports_fp32_rtn;
-  }
-
-  if (!info.supports_image3d_writes &&
-      ((info.vendor == Vendor::kQualcomm &&
-        IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) ||
-       info.vendor == Vendor::kNvidia))
-  {
-    // in local tests Adreno 430 can write in image 3d, at least on small sizes,
-    // but it doesn't have cl_khr_3d_image_writes in list of available
-    // extensions
-    // The same for NVidia
-    info.supports_image3d_writes = true;
-  }
-  info.compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
-  info.image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
-  info.image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
-  info.buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
-  if (info.cl_version >= OpenCLVersion::CL_1_2)
-  {
-    info.image_buffer_max_size = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
-    info.image_array_max_layers = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
-  }
-  info.image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
-  info.image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
-  info.image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
-  int3 max_work_group_sizes;
-  GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
-  info.max_work_group_size_x = max_work_group_sizes.x;
-  info.max_work_group_size_y = max_work_group_sizes.y;
-  info.max_work_group_size_z = max_work_group_sizes.z;
-
-  if (info.IsIntel())
-  {
-    if (info.SupportsExtension("cl_intel_required_subgroup_size"))
-    {
-      size_t sub_groups_count;
-      cl_int status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, nullptr,
-                                      &sub_groups_count);
-      if (status == CL_SUCCESS)
-      {
-        std::vector<size_t> sub_group_sizes(sub_groups_count);
-        status =
-          clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
-                          sizeof(size_t) * sub_groups_count, sub_group_sizes.data(), nullptr);
-        if (status == CL_SUCCESS)
-        {
-          for (size_t i = 0; i < sub_groups_count; ++i)
-          {
-            info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
-          }
-        }
-      }
-    }
-  }
-  return info;
-}
-
-CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
-  : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id)
-{
-}
-
-CLDevice::CLDevice(const CLDevice &device)
-  : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_)
-{
-}
-
-CLDevice &CLDevice::operator=(const CLDevice &device)
-{
-  if (this != &device)
-  {
-    info_ = device.info_;
-    id_ = device.id_;
-    platform_id_ = device.platform_id_;
-  }
-  return *this;
-}
-
-CLDevice::CLDevice(CLDevice &&device)
-  : info_(std::move(device.info_)), id_(device.id_), platform_id_(device.platform_id_)
-{
-  device.id_ = nullptr;
-  device.platform_id_ = nullptr;
-}
-
-CLDevice &CLDevice::operator=(CLDevice &&device)
-{
-  if (this != &device)
-  {
-    id_ = nullptr;
-    platform_id_ = nullptr;
-    info_ = std::move(device.info_);
-    std::swap(id_, device.id_);
-    std::swap(platform_id_, device.platform_id_);
-  }
-  return *this;
-}
-
-bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
-
-bool CLDevice::SupportsExtension(const std::string &extension) const
-{
-  return info_.SupportsExtension(extension);
-}
-
-bool CLDevice::SupportsTextureArray() const { return info_.SupportsTextureArray(); }
-
-bool CLDevice::SupportsImageBuffer() const { return info_.SupportsImageBuffer(); }
-
-bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
-
-bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
-
-bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
-
-std::string CLDevice::GetPlatformVersion() const
-{
-  return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
-}
-
-bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); }
-
-bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const
-{
-  return info_.SupportsSubGroupWithSize(sub_group_size);
-}
-
-bool CLDevice::IsAdreno() const { return info_.IsAdreno(); }
-
-bool CLDevice::IsAdreno3xx() const { return info_.IsAdreno3xx(); }
-
-bool CLDevice::IsAdreno4xx() const { return info_.IsAdreno4xx(); }
-
-bool CLDevice::IsAdreno5xx() const { return info_.IsAdreno5xx(); }
-
-bool CLDevice::IsAdreno6xx() const { return info_.IsAdreno6xx(); }
-
-bool CLDevice::IsAdreno6xxOrHigher() const { return info_.IsAdreno6xxOrHigher(); }
-
-bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); }
-
-bool CLDevice::IsNvidia() const { return info_.IsNvidia(); }
-
-bool CLDevice::IsMali() const { return info_.IsMali(); }
-
-bool CLDevice::IsAMD() const { return info_.IsAMD(); }
-
-bool CLDevice::IsIntel() const { return info_.IsIntel(); }
-
-bool CLDevice::SupportsOneLayerTextureArray() const { return info_.SupportsOneLayerTextureArray(); }
-
-void CLDevice::DisableOneLayerTextureArray()
-{
-  info_.adreno_info.support_one_layer_texture_array = false;
-}
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result)
-{
-  cl_uint num_platforms;
-  clGetPlatformIDs(0, nullptr, &num_platforms);
-  if (num_platforms == 0)
-  {
-    return absl::UnknownError("No supported OpenCL platform.");
-  }
-  std::vector<cl_platform_id> platforms(num_platforms);
-  clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
-
-  cl_platform_id platform_id = platforms[0];
-  cl_uint num_devices;
-  clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
-  if (num_devices == 0)
-  {
-    return absl::UnknownError("No GPU on current platform.");
-  }
-
-  std::vector<cl_device_id> devices(num_devices);
-  clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(), nullptr);
-
-  *result = CLDevice(devices[0], platform_id);
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h

deleted file mode 100644 (file)

index 6e740fe..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-
-#include <string>
-#include <vector>
-
-#include "DeviceInfo.h"
-#include "OpenclWrapper.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A wrapper around opencl device id
-class CLDevice
-{
-public:
-  CLDevice() = default;
-  CLDevice(cl_device_id id, cl_platform_id platform_id);
-
-  CLDevice(CLDevice &&device);
-  CLDevice &operator=(CLDevice &&device);
-  CLDevice(const CLDevice &);
-  CLDevice &operator=(const CLDevice &);
-
-  ~CLDevice() {}
-
-  cl_device_id id() const { return id_; }
-  cl_platform_id platform() const { return platform_id_; }
-  std::string GetPlatformVersion() const;
-
-  Vendor vendor() const { return info_.vendor; }
-  OpenCLVersion cl_version() const { return info_.cl_version; }
-  bool SupportsFP16() const;
-  bool SupportsTextureArray() const;
-  bool SupportsImageBuffer() const;
-  bool SupportsImage3D() const;
-  bool SupportsExtension(const std::string &extension) const;
-  bool SupportsFP32RTN() const;
-  bool SupportsFP16RTN() const;
-  bool IsCL20OrHigher() const;
-  bool SupportsSubGroupWithSize(int sub_group_size) const;
-  bool IsAdreno() const;
-  bool IsAdreno3xx() const;
-  bool IsAdreno4xx() const;
-  bool IsAdreno5xx() const;
-  bool IsAdreno6xx() const;
-  bool IsAdreno6xxOrHigher() const;
-  bool IsPowerVR() const;
-  bool IsNvidia() const;
-  bool IsMali() const;
-  bool IsAMD() const;
-  bool IsIntel() const;
-
-  // To track bug on some Adreno. b/131099086
-  bool SupportsOneLayerTextureArray() const;
-  void DisableOneLayerTextureArray();
-
-  const DeviceInfo &GetInfo() const { return info_; }
-  // We update device info during context creation, so as supported texture
-  // formats can be requested from context only.
-  mutable DeviceInfo info_;
-
-private:
-  cl_device_id id_ = nullptr;
-  cl_platform_id platform_id_ = nullptr;
-};
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result);
-
-template <typename T> T GetDeviceInfo(cl_device_id id, cl_device_info info)
-{
-  T result;
-  cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return -1;
-  }
-  return result;
-}
-
-template <typename T> absl::Status GetDeviceInfo(cl_device_id id, cl_device_info info, T *result)
-{
-  cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return absl::InvalidArgumentError(CLErrorCodeToString(error));
-  }
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h b/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h

deleted file mode 100644 (file)

index 48cd2fb..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-
-#include <string>
-
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// @return if error_code is success, then return OK status. Otherwise translates
-// error code into a message.
-inline absl::Status GetOpenCLError(cl_int error_code)
-{
-  if (error_code == CL_SUCCESS)
-  {
-    return absl::OkStatus();
-  }
-  return absl::InternalError("OpenCL error: " + CLErrorCodeToString(error_code));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc

deleted file mode 100644 (file)

index beb64a9..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClEvent.h"
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-CLEvent::CLEvent(cl_event event) : event_(event) {}
-
-CLEvent::CLEvent(CLEvent &&event) : event_(event.event_), name_(std::move(event.name_))
-{
-  event.event_ = nullptr;
-}
-
-CLEvent &CLEvent::operator=(CLEvent &&event)
-{
-  if (this != &event)
-  {
-    Release();
-    std::swap(event_, event.event_);
-    name_ = std::move(event.name_);
-  }
-  return *this;
-}
-
-uint64_t CLEvent::GetStartedTimeNs() const
-{
-  cl_ulong time_ns;
-  clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time_ns, nullptr);
-  return time_ns;
-}
-
-uint64_t CLEvent::GetFinishedTimeNs() const
-{
-  cl_ulong time_ns;
-  clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time_ns, nullptr);
-  return time_ns;
-}
-
-double CLEvent::GetEventTimeMs() const
-{
-  const uint64_t start = GetStartedTimeNs();
-  const uint64_t end = GetFinishedTimeNs();
-  const uint64_t time_ns = (end - start);
-
-  return static_cast<double>(time_ns) * 1e-6;
-}
-
-uint64_t CLEvent::GetEventTimeNs() const { return GetFinishedTimeNs() - GetStartedTimeNs(); }
-
-void CLEvent::SetName(const std::string &name) { name_ = name; }
-
-void CLEvent::Wait() const { clWaitForEvents(1, &event_); }
-
-CLEvent::~CLEvent() { Release(); }
-
-void CLEvent::Release()
-{
-  if (event_)
-  {
-    clReleaseEvent(event_);
-    event_ = nullptr;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h

deleted file mode 100644 (file)

index 265409f..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-
-#include <cstdint>
-#include <string>
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl event
-class CLEvent
-{
-public:
-  CLEvent() {}
-  explicit CLEvent(cl_event event);
-
-  // Move only
-  CLEvent(CLEvent &&event);
-  CLEvent &operator=(CLEvent &&event);
-  CLEvent(const CLEvent &) = delete;
-  CLEvent &operator=(const CLEvent &) = delete;
-
-  ~CLEvent();
-
-  uint64_t GetStartedTimeNs() const;
-  uint64_t GetFinishedTimeNs() const;
-
-  double GetEventTimeMs() const;
-  uint64_t GetEventTimeNs() const;
-
-  void Wait() const;
-
-  cl_event event() const { return event_; }
-
-  bool is_valid() const { return event_ != nullptr; }
-
-  void SetName(const std::string &name);
-  std::string GetName() const { return name_; }
-
-private:
-  void Release();
-
-  cl_event event_ = nullptr;
-
-  std::string name_; // optional, for profiling mostly
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc

deleted file mode 100644 (file)

index 247a63d..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClImageFormat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels)
-{
-  switch (num_channels)
-  {
-    case 1:
-      return CL_R;
-    case 2:
-      return CL_RG;
-    case 3:
-      return CL_RGB;
-    case 4:
-      return CL_RGBA;
-    default:
-      return -1;
-  }
-}
-
-cl_channel_type ToImageChannelType(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::FLOAT32:
-      return CL_FLOAT;
-    case DataType::FLOAT16:
-      return CL_HALF_FLOAT;
-    default:
-      return -1;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h

deleted file mode 100644 (file)

index a763746..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-
-#include "OpenclWrapper.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels);
-
-cl_channel_type ToImageChannelType(DataType data_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc

deleted file mode 100644 (file)

index f7745b9..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClKernel.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClProgram.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
-  size_t max_work_group_size;
-  cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
-                                               sizeof(size_t), &max_work_group_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_WORK_GROUP_SIZE ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  *result = static_cast<int>(max_work_group_size);
-  return absl::OkStatus();
-}
-
-absl::Status GetKernelPrivateMemorySize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
-  cl_ulong private_mem_size;
-  cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PRIVATE_MEM_SIZE,
-                                               sizeof(cl_ulong), &private_mem_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_PRIVATE_MEM_SIZE ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  *result = static_cast<int>(private_mem_size);
-  return absl::OkStatus();
-}
-
-} // namespace
-
-CLKernel::CLKernel(CLKernel &&kernel)
-  : info_(kernel.info_), binding_counter_(kernel.binding_counter_),
-    function_name_(std::move(kernel.function_name_)), program_(kernel.program_),
-    kernel_(kernel.kernel_)
-{
-  kernel.kernel_ = nullptr;
-}
-
-CLKernel &CLKernel::operator=(CLKernel &&kernel)
-{
-  if (this != &kernel)
-  {
-    Release();
-    std::swap(info_, kernel.info_);
-    std::swap(binding_counter_, kernel.binding_counter_);
-    function_name_ = std::move(kernel.function_name_);
-    std::swap(program_, kernel.program_);
-    std::swap(kernel_, kernel.kernel_);
-  }
-  return *this;
-}
-
-CLKernel::~CLKernel() { Release(); }
-
-absl::Status CLKernel::ReInit() const
-{
-  clReleaseKernel(kernel_);
-  cl_kernel *kern_ptr = const_cast<cl_kernel *>(&kernel_);
-  int error_code;
-  *kern_ptr = clCreateKernel(program_, function_name_.c_str(), &error_code);
-  if (!kernel_ || error_code != CL_SUCCESS)
-  {
-    *kern_ptr = nullptr;
-    return absl::UnknownError(
-      absl::StrCat("Failed to create ", function_name_, CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-void CLKernel::Release()
-{
-  if (kernel_)
-  {
-    clReleaseKernel(kernel_);
-    clReleaseProgram(program_);
-    kernel_ = nullptr;
-  }
-}
-
-absl::Status CLKernel::CreateFromProgram(const CLProgram &program, const std::string &function_name)
-{
-  int error_code;
-  function_name_ = function_name;
-  kernel_ = clCreateKernel(program.program(), function_name.c_str(), &error_code);
-  if (!kernel_ || error_code != CL_SUCCESS)
-  {
-    kernel_ = nullptr;
-    return absl::UnknownError(
-      absl::StrCat("Failed to create ", function_name, CLErrorCodeToString(error_code)));
-  }
-
-  program_ = program.program();
-  clRetainProgram(program_);
-
-  RETURN_IF_ERROR(
-    GetKernelPrivateMemorySize(kernel_, program.GetDeviceId(), &info_.private_memory_size));
-  RETURN_IF_ERROR(
-    GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(), &info_.max_work_group_size));
-  return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetMemory(int index, cl_mem memory)
-{
-  return SetBytes(index, &memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetMemoryAuto(cl_mem memory)
-{
-  return SetBytesAuto(&memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetBytes(int index, const void *ptr, int length) const
-{
-  const int error_code = clSetKernelArg(kernel_, index, length, ptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to set kernel arguments - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetBytesAuto(const void *ptr, int length)
-{
-  const int error_code = clSetKernelArg(kernel_, binding_counter_, length, ptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                           CLErrorCodeToString(error_code), "(at index - ",
-                                           binding_counter_, ")"));
-  }
-  binding_counter_++;
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h

deleted file mode 100644 (file)

index 9575b79..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-
-#include <string>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClProgram.h"
-#include "OpenclWrapper.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct KernelInfo
-{
-  int private_memory_size = 0;
-  int max_work_group_size = 0;
-};
-
-// Arguments binding to CLKernel can be manual or automatic
-// In manual you specify binding index explicitly
-// In automatic binding, index auto-incremented with every binding call
-// Also, if you use automatic mode you must call ResetBindingCounter
-//   before parameters binding
-class CLKernel
-{
-public:
-  CLKernel() {}
-
-  // Move only
-  CLKernel(CLKernel &&kernel);
-  CLKernel &operator=(CLKernel &&kernel);
-  CLKernel(const CLKernel &) = delete;
-  CLKernel &operator=(const CLKernel &) = delete;
-
-  ~CLKernel();
-
-  cl_kernel kernel() const { return kernel_; }
-
-  absl::Status CreateFromProgram(const CLProgram &program, const std::string &function_name);
-
-  absl::Status SetMemory(int index, cl_mem memory);
-  absl::Status SetMemoryAuto(cl_mem memory);
-  template <typename T> absl::Status SetBytes(int index, const T &value) const
-  {
-    return SetBytes(index, static_cast<const void *>(&value), sizeof(T));
-  }
-  template <typename T> absl::Status SetBytesAuto(const T &value)
-  {
-    return SetBytesAuto(static_cast<const void *>(&value), sizeof(T));
-  }
-
-  int GetBindingCounter() const { return binding_counter_; }
-  void ResetBindingCounter() { binding_counter_ = 0; }
-
-  // Do not use this function
-  // workaround for Mali memory leak
-  absl::Status ReInit() const;
-
-  KernelInfo info_;
-
-private:
-  void Release();
-  absl::Status SetBytes(int index, const void *ptr, int length) const;
-  absl::Status SetBytesAuto(const void *ptr, int length);
-
-  int binding_counter_ = -1;
-
-  std::string function_name_ = "";
-  // reference to program from which kernel was created
-  cl_program program_ = nullptr;
-  cl_kernel kernel_ = nullptr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc

deleted file mode 100644 (file)

index fd3bc55..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClMemory.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_mem_flags ToClMemFlags(AccessType access_type)
-{
-  switch (access_type)
-  {
-    case AccessType::READ:
-      return CL_MEM_READ_ONLY;
-    case AccessType::WRITE:
-      return CL_MEM_WRITE_ONLY;
-    case AccessType::READ_WRITE:
-      return CL_MEM_READ_WRITE;
-    default:
-      throw std::runtime_error("Invalid AccessType");
-  }
-
-  return CL_MEM_READ_ONLY; // unreachable
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h

deleted file mode 100644 (file)

index c704ec7..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-
-#include <algorithm>
-
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// RAII wrapper for OpenCL memory object.
-//
-// Image is moveable but not copyable.
-class CLMemory
-{
-public:
-  // Creates invalid object.
-  CLMemory() : CLMemory(nullptr, false) {}
-
-  CLMemory(cl_mem memory, bool has_ownership) : memory_(memory), has_ownership_(has_ownership) {}
-
-  // Move-only
-  CLMemory(const CLMemory &) = delete;
-  CLMemory &operator=(const CLMemory &) = delete;
-  CLMemory(CLMemory &&image) : memory_(image.memory_), has_ownership_(image.has_ownership_)
-  {
-    image.memory_ = nullptr;
-  }
-
-  ~CLMemory() { Invalidate(); }
-
-  CLMemory &operator=(CLMemory &&image)
-  {
-    if (this != &image)
-    {
-      Invalidate();
-      std::swap(memory_, image.memory_);
-      has_ownership_ = image.has_ownership_;
-    }
-    return *this;
-  }
-
-  cl_mem memory() const { return memory_; }
-
-  bool is_valid() const { return memory_ != nullptr; }
-
-  // @return true if this object actually owns corresponding CL memory
-  //         and manages it's lifetime.
-  bool has_ownership() const { return has_ownership_; }
-
-  cl_mem Release()
-  {
-    cl_mem to_return = memory_;
-    memory_ = nullptr;
-    return to_return;
-  }
-
-private:
-  void Invalidate()
-  {
-    if (memory_ && has_ownership_)
-    {
-      clReleaseMemObject(memory_);
-    }
-    memory_ = nullptr;
-  }
-
-  cl_mem memory_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-cl_mem_flags ToClMemFlags(AccessType access_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc

deleted file mode 100644 (file)

index c72b01a..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClProgram.h"
-
-#include <cstdint>
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetProgramBuildInfo(cl_program program, cl_device_id id, cl_program_build_info info)
-{
-  size_t size;
-  cl_int error_code = clGetProgramBuildInfo(program, id, info, 0, nullptr, &size);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
-  }
-
-  std::string result(size - 1, 0);
-  error_code = clGetProgramBuildInfo(program, id, info, size, &result[0], nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
-  }
-  return result;
-}
-
-absl::Status GetBinarySize(cl_program program, size_t *binary_size)
-{
-  cl_int error_code =
-    clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), binary_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to get program binary size - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status BuildProgram(cl_program program, const CLDevice &device,
-                          const std::string &compiler_options)
-{
-  const int error_code =
-    clBuildProgram(program, 0, nullptr, compiler_options.c_str(), nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to build program executable - ", CLErrorCodeToString(error_code),
-                   GetProgramBuildInfo(program, device.id(), CL_PROGRAM_BUILD_LOG)));
-  }
-
-  return absl::OkStatus();
-}
-
-std::string CompilerOptionToString(const CLDevice &device, CompilerOptions option)
-{
-  switch (option)
-  {
-    case CompilerOptions::ADRENO_FULL_SIMD_LINE:
-      if (device.info_.adreno_info.gpu_version < 500)
-      {
-        return "-qcom-accelerate-16-bit";
-      }
-      else
-      {
-        return "-qcom-accelerate-16-bit=true";
-      }
-    case CompilerOptions::ADRENO_MORE_WAVES:
-      if (device.info_.adreno_info.gpu_version >= 500)
-      {
-        return "-qcom-accelerate-16-bit=false";
-      }
-      else
-      {
-        return "";
-      }
-    case CompilerOptions::POWERVR_FP16:
-      return "-cl-fast-relaxed-math";
-    case CompilerOptions::CL_OPT_DISABLE:
-      return "-cl-opt-disable";
-    case CompilerOptions::CL_2_0:
-      return "-cl-std=CL2.0";
-    case CompilerOptions::CL_3_0:
-      return "-cl-std=CL3.0";
-  }
-  return "";
-}
-
-} // namespace
-
-std::string CompilerOptionsToString(const CLDevice &device,
-                                    const std::vector<CompilerOptions> &compiler_options)
-{
-  std::string result;
-  for (auto option : compiler_options)
-  {
-    absl::StrAppend(&result, CompilerOptionToString(device, option), " ");
-  }
-  return result;
-}
-
-CLProgram::CLProgram(cl_program program, cl_device_id device_id)
-  : program_(program), device_id_(device_id)
-{
-}
-
-CLProgram::CLProgram(CLProgram &&program)
-  : program_(program.program_), device_id_(program.device_id_)
-{
-  program.program_ = nullptr;
-}
-
-CLProgram &CLProgram::operator=(CLProgram &&program)
-{
-  if (this != &program)
-  {
-    Release();
-    std::swap(program_, program.program_);
-    std::swap(device_id_, program.device_id_);
-  }
-  return *this;
-}
-
-CLProgram::~CLProgram() { Release(); }
-
-void CLProgram::Release()
-{
-  if (program_)
-  {
-    clReleaseProgram(program_);
-    program_ = nullptr;
-  }
-}
-
-absl::Status CLProgram::GetBinary(std::vector<uint8_t> *result) const
-{
-  size_t binary_size;
-  RETURN_IF_ERROR(GetBinarySize(program_, &binary_size));
-  result->resize(result->size() + binary_size);
-  uint8_t *binary_ptr = result->data() + result->size() - binary_size;
-  cl_int error_code =
-    clGetProgramInfo(program_, CL_PROGRAM_BINARIES, binary_size, &binary_ptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to get program binary - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
-                             const CLContext &context, const CLDevice &device, CLProgram *result)
-{
-  int error_code;
-  const char *source = code.c_str();
-
-  cl_program program =
-    clCreateProgramWithSource(context.context(), 1, &source, nullptr, &error_code);
-  if (!program || error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create compute program - ", CLErrorCodeToString(error_code)));
-  }
-
-  *result = CLProgram(program, device.id());
-  RETURN_IF_ERROR(BuildProgram(program, device, compiler_options));
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
-                                       absl::Span<const uint8_t> binary, CLProgram *result)
-{
-  cl_int binary_status;
-  cl_int error_code;
-  cl_device_id devices_list[] = {device.id()};
-  size_t binary_size = binary.size();
-  const uint8_t *binary_pointer = binary.data();
-  cl_program program = clCreateProgramWithBinary(context.context(), 1, devices_list, &binary_size,
-                                                 &binary_pointer, &binary_status, &error_code);
-  if (binary_status != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat(
-      "Something wrong with binary after clCreateProgramWithBinary - ", binary_status));
-  }
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create program - ", CLErrorCodeToString(error_code)));
-  }
-  *result = CLProgram(program, device.id());
-  return BuildProgram(program, device, "");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h

deleted file mode 100644 (file)

index d039ff6..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-
-#include <cstdint>
-#include <vector>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CompilerOptions
-{
-  // ADRENO_FULL_SIMD_LINE:
-  //   Adreno can have 2 sizes for SIMD size.
-  //   On Adreno 4xx/5xx it is 32/64, on Adreno6xx it is 64/128.
-  //   Some our algorithms actually rely on exact size, for example on full
-  //   SIMD size, so we need this define.
-  //   This define is actually -qcom-accelerate-16-bit, but it controls SIMD
-  //   size.
-  ADRENO_FULL_SIMD_LINE,
-  ADRENO_MORE_WAVES,
-  POWERVR_FP16,
-  CL_OPT_DISABLE,
-  CL_2_0,
-  CL_3_0,
-};
-
-std::string CompilerOptionsToString(const CLDevice &device,
-                                    const std::vector<CompilerOptions> &compiler_options);
-
-class CLProgram
-{
-public:
-  CLProgram() {}
-  CLProgram(cl_program program, cl_device_id device_id);
-
-  // Move only
-  CLProgram(CLProgram &&program);
-  CLProgram &operator=(CLProgram &&program);
-  CLProgram(const CLProgram &) = delete;
-  CLProgram &operator=(const CLProgram &) = delete;
-
-  ~CLProgram();
-
-  cl_program program() const { return program_; }
-
-  // Return the cl_device_id associated with the program object.
-  // This can be the device associated with context on which the program object
-  // has been created or can be device that was specified when a program object
-  // was created using clCreateProgramWithBinary.
-  cl_device_id GetDeviceId() const { return device_id_; }
-
-  absl::Status GetBinary(std::vector<uint8_t> *result) const;
-
-private:
-  void Release();
-
-  cl_program program_ = nullptr;
-
-  // reference
-  cl_device_id device_id_ = nullptr;
-};
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
-                             const CLContext &context, const CLDevice &device, CLProgram *result);
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
-                                       absl::Span<const uint8_t> binary, CLProgram *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc b/runtime/onert/backend/gpu_cl/open_cl/DataType.cc

deleted file mode 100644 (file)

index ce2aa82..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DataType.h"
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-size_t SizeOf(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::UINT8:
-    case DataType::INT8:
-      return 1;
-    case DataType::FLOAT16:
-    case DataType::INT16:
-    case DataType::UINT16:
-      return 2;
-    case DataType::FLOAT32:
-    case DataType::INT32:
-    case DataType::UINT32:
-      return 4;
-    case DataType::FLOAT64:
-    case DataType::INT64:
-    case DataType::UINT64:
-      return 8;
-    case DataType::UNKNOWN:
-      return 0;
-  }
-  return 0;
-}
-
-std::string ToString(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::FLOAT16:
-      return "float16";
-    case DataType::FLOAT32:
-      return "float32";
-    case DataType::FLOAT64:
-      return "float64";
-    case DataType::INT16:
-      return "int16";
-    case DataType::INT32:
-      return "int32";
-    case DataType::INT64:
-      return "int64";
-    case DataType::INT8:
-      return "int8";
-    case DataType::UINT16:
-      return "uint16";
-    case DataType::UINT32:
-      return "uint32";
-    case DataType::UINT64:
-      return "uint64";
-    case DataType::UINT8:
-      return "uint8";
-    case DataType::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-std::string ToCLDataType(DataType data_type, int vec_size)
-{
-  const std::string postfix = vec_size == 1 ? "" : std::to_string(vec_size);
-  switch (data_type)
-  {
-    case DataType::FLOAT16:
-      return "half" + postfix;
-    case DataType::FLOAT32:
-      return "float" + postfix;
-    case DataType::FLOAT64:
-      return "double" + postfix;
-    case DataType::INT16:
-      return "short" + postfix;
-    case DataType::INT32:
-      return "int" + postfix;
-    case DataType::INT64:
-      return "long" + postfix;
-    case DataType::INT8:
-      return "char" + postfix;
-    case DataType::UINT16:
-      return "ushort" + postfix;
-    case DataType::UINT32:
-      return "uint" + postfix;
-    case DataType::UINT64:
-      return "ulong" + postfix;
-    case DataType::UINT8:
-      return "uchar" + postfix;
-    case DataType::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.h b/runtime/onert/backend/gpu_cl/open_cl/DataType.h

deleted file mode 100644 (file)

index 2a5afd5..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class DataType
-{
-  UNKNOWN = 0,
-  FLOAT16 = 1,
-  FLOAT32 = 2,
-  FLOAT64 = 3,
-  UINT8 = 4,
-  INT8 = 5,
-  UINT16 = 6,
-  INT16 = 7,
-  UINT32 = 8,
-  INT32 = 9,
-  UINT64 = 10,
-  INT64 = 11,
-};
-
-size_t SizeOf(DataType type);
-
-std::string ToString(DataType t);
-
-std::string ToCLDataType(DataType data_type, int vec_size = 1);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc

deleted file mode 100644 (file)

index 2966fad..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DeviceInfo.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
-  return gpu_version >= min_version && gpu_version < max_version;
-}
-
-MaliGPU GetMaliGPUVersion(const std::string &device_name)
-{
-  const std::map<std::string, MaliGPU> kMapping = {
-    {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
-    {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
-    {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
-    {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
-    {"G31", MaliGPU::G31},   {"G51", MaliGPU::G51},   {"G71", MaliGPU::G71},
-    {"G52", MaliGPU::G52},   {"G72", MaliGPU::G72},   {"G76", MaliGPU::G76},
-    {"G57", MaliGPU::G57},   {"G77", MaliGPU::G77},   {"G68", MaliGPU::G68},
-    {"G78", MaliGPU::G78},
-  };
-  for (const auto &v : kMapping)
-  {
-    if (device_name.find(v.first) != std::string::npos)
-    {
-      return v.second;
-    }
-  }
-  return MaliGPU::UNKNOWN;
-}
-
-} // namespace
-
-// There is no rule for gpu version encoding, but we found these samples:
-// Version: OpenCL C 2.0 Adreno(TM) 540   // Pixel 2
-// Version: OpenCL C 2.0 Adreno(TM) 630   // Sony Compact XZ2
-// Version: OpenCL C 2.0 Adreno(TM) 630   // Pixel 3
-// Version: OpenCL C 2.0 Adreno(TM) 540   // Samsung S8
-// Version: OpenCL C 1.2 Adreno(TM) 430   // HTC One M9
-// Version: OpenCL C 2.0 Adreno(TM) 530   // Samsung S7 Edge
-// Version: OpenCL C 1.2 Adreno(TM) 405   // Motorola Moto G(4)
-// After the number string ends.
-// It is assumed that the <vendor-specific information> for Adreno GPUs has
-// the following format:
-// <text?><space?>Adreno(TM)<space><text?><version>
-// Returns -1 if vendor-specific information cannot be parsed
-int GetAdrenoGPUVersion(const std::string &gpu_version)
-{
-  const std::string gpu = absl::AsciiStrToLower(gpu_version);
-  const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
-  size_t i = 0;
-  for (; i < words.size(); ++i)
-  {
-    if (words[i].find("adreno") != words[i].npos)
-    {
-      break;
-    }
-  }
-  i += 1;
-  for (; i < words.size(); ++i)
-  {
-    int number;
-    bool is_number = absl::SimpleAtoi(words[i], &number);
-    // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
-    if (is_number && number >= 300)
-    {
-      return number;
-    }
-  }
-  return -1;
-}
-
-std::string VendorToString(Vendor v)
-{
-  switch (v)
-  {
-    case Vendor::kQualcomm:
-      return "Qualcomm";
-    case Vendor::kMali:
-      return "Mali";
-    case Vendor::kPowerVR:
-      return "PowerVR";
-    case Vendor::kNvidia:
-      return "NVIDIA";
-    case Vendor::kAMD:
-      return "AMD";
-    case Vendor::kIntel:
-      return "Intel";
-    case Vendor::kUnknown:
-      return "unknown vendor";
-    default:
-      return "Error";
-  }
-}
-
-std::string OpenCLVersionToString(OpenCLVersion version)
-{
-  switch (version)
-  {
-    case OpenCLVersion::CL_1_0:
-      return "1.0";
-    case OpenCLVersion::CL_1_1:
-      return "1.1";
-    case OpenCLVersion::CL_1_2:
-      return "1.2";
-    case OpenCLVersion::CL_2_0:
-      return "2.0";
-    case OpenCLVersion::CL_2_1:
-      return "2.1";
-    case OpenCLVersion::CL_2_2:
-      return "2.2";
-    case OpenCLVersion::CL_3_0:
-      return "3.0";
-    default:
-      return "Error";
-  }
-}
-
-AdrenoInfo::AdrenoInfo(const std::string &device_version)
-  : gpu_version(GetAdrenoGPUVersion(device_version))
-{
-}
-
-int AdrenoInfo::GetMaximumWavesCount() const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version >= 400 && gpu_version < 500)
-  {
-    return -1; // Adreno 4xx does not support it currently
-  }
-  else if (gpu_version >= 500 && gpu_version < 600)
-  {
-    return -1; // Adreno 5xx does not support it currently
-  }
-  else if (gpu_version >= 600 && gpu_version < 700)
-  {
-    return gpu_version == 640 ? 30 : 16;
-  }
-  else
-  {
-    return -1; //  Adreno 7xx and higher does not exist yet
-  }
-}
-
-int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version >= 400 && gpu_version < 500)
-  {
-    return -1; // Adreno 4xx does not support it currently
-  }
-  else if (gpu_version >= 500 && gpu_version < 600)
-  {
-    return -1; // Adreno 5xx does not support it currently
-  }
-  else if (gpu_version >= 600 && gpu_version < 700)
-  {
-    return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
-  }
-  else
-  {
-    return -1; //  Adreno 7xx and higher does not exist yet
-  }
-}
-
-int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave) const
-{
-  const int register_usage_per_wave = GetWaveSize(full_wave) * register_footprint_per_tread;
-  const int possible_waves_count = GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
-  return std::min(possible_waves_count, GetMaximumWavesCount());
-}
-
-int AdrenoInfo::GetWaveSize(bool full_wave) const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version < 600)
-  {
-    return full_wave ? 64 : 32;
-  }
-  else
-  {
-    return full_wave ? 128 : 64;
-  }
-}
-
-MaliInfo::MaliInfo(const std::string &device_name) : gpu_version(GetMaliGPUVersion(device_name)) {}
-
-bool MaliInfo::IsMaliT6xx() const
-{
-  return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
-         gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
-         gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
-}
-
-bool MaliInfo::IsMaliT7xx() const
-{
-  return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
-}
-
-bool MaliInfo::IsMaliT8xx() const
-{
-  return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
-         gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
-}
-
-bool MaliInfo::IsMidgard() const { return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); }
-
-bool MaliInfo::IsBifrostGen1() const
-{
-  return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || gpu_version == MaliGPU::G71;
-}
-
-bool MaliInfo::IsBifrostGen2() const
-{
-  return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
-}
-
-bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
-
-bool MaliInfo::IsBifrost() const { return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); }
-
-bool MaliInfo::IsValhall() const
-{
-  return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77 ||
-         gpu_version == MaliGPU::G68 || gpu_version == MaliGPU::G78;
-}
-
-bool DeviceInfo::SupportsTextureArray() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImageBuffer() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImage3D() const
-{
-  if (vendor == Vendor::kMali)
-  {
-    // On Mali T880 read_imageh doesn't compile with image3d_t
-    return false;
-  }
-  return supports_image3d_writes;
-}
-
-bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const
-{
-  if (channels == 1)
-  {
-    return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d : supports_r_f16_tex2d;
-  }
-  else if (channels == 2)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d : supports_rg_f16_tex2d;
-  }
-  else if (channels == 3)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d : supports_rgb_f16_tex2d;
-  }
-  else if (channels == 4)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d : supports_rgba_f16_tex2d;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-bool DeviceInfo::SupportsOneLayerTextureArray() const
-{
-  return !IsAdreno() || adreno_info.support_one_layer_texture_array;
-}
-
-bool DeviceInfo::SupportsExtension(const std::string &extension) const
-{
-  for (const auto &ext : extensions)
-  {
-    if (ext == extension)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool DeviceInfo::IsCL20OrHigher() const
-{
-  return cl_version != OpenCLVersion::CL_1_0 && cl_version != OpenCLVersion::CL_1_1 &&
-         cl_version != OpenCLVersion::CL_1_2;
-}
-
-bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const
-{
-  for (auto subgroup_size : supported_subgroup_sizes)
-  {
-    if (sub_group_size == subgroup_size)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
-
-bool DeviceInfo::IsAdreno3xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400);
-}
-
-bool DeviceInfo::IsAdreno4xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500);
-}
-
-bool DeviceInfo::IsAdreno5xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600);
-}
-
-bool DeviceInfo::IsAdreno6xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700);
-}
-
-bool DeviceInfo::IsAdreno6xxOrHigher() const
-{
-  return IsAdreno() && adreno_info.gpu_version >= 600;
-}
-
-bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; }
-
-bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; }
-
-bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; }
-
-bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; }
-
-bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h

deleted file mode 100644 (file)

index 85d7d4c..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-
-#include <string>
-#include <vector>
-
-#include "DataType.h"
-
-// for use only in device_info.cc, but keep here to make tests
-int GetAdrenoGPUVersion(const std::string &gpu_version);
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Vendor
-{
-  kQualcomm,
-  kMali,
-  kPowerVR,
-  kNvidia,
-  kAMD,
-  kIntel,
-  kUnknown
-};
-std::string VendorToString(Vendor v);
-
-enum class OpenCLVersion
-{
-  UNKNOWN,
-  CL_1_0,
-  CL_1_1,
-  CL_1_2,
-  CL_2_0,
-  CL_2_1,
-  CL_2_2,
-  CL_3_0
-};
-std::string OpenCLVersionToString(OpenCLVersion version);
-
-struct AdrenoInfo
-{
-  AdrenoInfo() = default;
-  explicit AdrenoInfo(const std::string &device_version);
-  int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
-
-  // This function returns some not very documented physical parameter of
-  // Adreno6xx GPU.
-  // We obtained it using Snapdragon Profiler.
-  int GetMaximumWavesCount() const;
-
-  // returns amount of register memory per CU(Compute Unit) in bytes.
-  int GetRegisterMemorySizePerComputeUnit() const;
-
-  // returns maximum possible amount of waves based on register usage.
-  int GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave = true) const;
-
-  int GetWaveSize(bool full_wave) const;
-
-  // Not supported on some Adreno devices with specific driver version.
-  // b/131099086
-  bool support_one_layer_texture_array = true;
-};
-
-enum class MaliGPU
-{
-  T604,
-  T622,
-  T624,
-  T628,
-  T658,
-  T678,
-  T720,
-  T760,
-  T820,
-  T830,
-  T860,
-  T880,
-  G31,
-  G51,
-  G71,
-  G52,
-  G72,
-  G76,
-  G57,
-  G77,
-  G68,
-  G78,
-  UNKNOWN
-};
-
-struct MaliInfo
-{
-  MaliInfo() = default;
-  explicit MaliInfo(const std::string &device_name);
-  MaliGPU gpu_version = MaliGPU::UNKNOWN;
-
-  bool IsMaliT6xx() const;
-  bool IsMaliT7xx() const;
-  bool IsMaliT8xx() const;
-  bool IsMidgard() const;
-  bool IsBifrostGen1() const;
-  bool IsBifrostGen2() const;
-  bool IsBifrostGen3() const;
-  bool IsBifrost() const;
-  bool IsValhall() const;
-};
-
-struct DeviceInfo
-{
-  DeviceInfo() = default;
-
-  bool IsAdreno() const;
-  bool IsAdreno3xx() const;
-  bool IsAdreno4xx() const;
-  bool IsAdreno5xx() const;
-  bool IsAdreno6xx() const;
-  bool IsAdreno6xxOrHigher() const;
-  bool IsPowerVR() const;
-  bool IsNvidia() const;
-  bool IsMali() const;
-  bool IsAMD() const;
-  bool IsIntel() const;
-
-  bool SupportsTextureArray() const;
-  bool SupportsImageBuffer() const;
-  bool SupportsImage3D() const;
-
-  bool SupportsFloatImage2D(DataType data_type, int channels) const;
-
-  // To track bug on some Adreno. b/131099086
-  bool SupportsOneLayerTextureArray() const;
-
-  bool SupportsExtension(const std::string &extension) const;
-  bool IsCL20OrHigher() const;
-  bool SupportsSubGroupWithSize(int sub_group_size) const;
-
-  std::vector<std::string> extensions;
-  bool supports_fp16 = false;
-  bool supports_image3d_writes = false;
-  Vendor vendor = Vendor::kUnknown;
-  OpenCLVersion cl_version = OpenCLVersion::UNKNOWN;
-  int compute_units_count = 0;
-  uint64_t buffer_max_size = 0;
-  uint64_t image2d_max_width = 0;
-  uint64_t image2d_max_height = 0;
-  uint64_t image_buffer_max_size = 0;
-  uint64_t image_array_max_layers = 0;
-  uint64_t image3d_max_width = 0;
-  uint64_t image3d_max_height = 0;
-  uint64_t image3d_max_depth = 0;
-  int max_work_group_size_x = 0;
-  int max_work_group_size_y = 0;
-  int max_work_group_size_z = 0;
-  std::vector<int> supported_subgroup_sizes;
-
-  // rtn is ROUND_TO_NEAREST
-  // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
-  // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
-  // Mali from T6xx supports rtn
-  // PowerVR supports only rtz
-  bool supports_fp32_rtn = false;
-  bool supports_fp16_rtn = false;
-
-  bool supports_r_f16_tex2d = false;
-  bool supports_rg_f16_tex2d = false;
-  bool supports_rgb_f16_tex2d = false;
-  bool supports_rgba_f16_tex2d = false;
-
-  bool supports_r_f32_tex2d = false;
-  bool supports_rg_f32_tex2d = false;
-  bool supports_rgb_f32_tex2d = false;
-  bool supports_rgba_f32_tex2d = false;
-
-  AdrenoInfo adreno_info;
-  MaliInfo mali_info;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc b/runtime/onert/backend/gpu_cl/open_cl/Environment.cc

deleted file mode 100644 (file)

index b558f03..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Environment.h"
-
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Shape.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Environment::Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
-                         ProfilingCommandQueue &&profiling_queue)
-  : device_(std::move(device)), context_(std::move(context)), queue_(std::move(queue)),
-    profiling_queue_(std::move(profiling_queue))
-{
-}
-
-Environment::Environment(Environment &&environment)
-  : device_(std::move(environment.device_)), context_(std::move(environment.context_)),
-    queue_(std::move(environment.queue_)),
-    profiling_queue_(std::move(environment.profiling_queue_)),
-    program_cache_(std::move(environment.program_cache_))
-{
-}
-
-Environment &Environment::operator=(Environment &&environment)
-{
-  if (this != &environment)
-  {
-    device_ = std::move(environment.device_);
-    context_ = std::move(environment.context_);
-    queue_ = std::move(environment.queue_);
-    profiling_queue_ = std::move(environment.profiling_queue_);
-    program_cache_ = std::move(environment.program_cache_);
-  }
-  return *this;
-}
-
-absl::Status Environment::Init()
-{
-  if (device().IsAdreno() && device().SupportsTextureArray())
-  {
-    // Some Adreno < 600 have bug with one layer texture array. b/131099086
-    // If we have one layer texture array and will write smt from kernel to this
-    // texture, we will get zeroes instead of actual values.
-    // The same kernel will work, if we use texture array with more than one
-    // layer.
-    if (device().info_.adreno_info.gpu_version < 600)
-    {
-      GetDevicePtr()->DisableOneLayerTextureArray();
-    }
-  }
-  return absl::OkStatus();
-}
-
-void Environment::SetHighPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetDefaultPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetLowPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const
-{
-  std::vector<CalculationsPrecision> precisions;
-  for (CalculationsPrecision precision :
-       {CalculationsPrecision::F32, CalculationsPrecision::F32_F16, CalculationsPrecision::F16})
-  {
-    if (IsSupported(precision))
-    {
-      precisions.push_back(precision);
-    }
-  }
-  return precisions;
-}
-
-bool Environment::IsSupported(CalculationsPrecision precision) const
-{
-  switch (precision)
-  {
-    case CalculationsPrecision::F32_F16:
-    case CalculationsPrecision::F16:
-      return device_.SupportsFP16();
-    case CalculationsPrecision::F32:
-      return true;
-  }
-  return false;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStorages() const
-{
-  std::vector<TensorStorageType> storage_types;
-  for (auto storage_type :
-       {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER, TensorStorageType::TEXTURE_ARRAY,
-        TensorStorageType::IMAGE_BUFFER, TensorStorageType::TEXTURE_3D})
-  {
-    if (IsSupported(storage_type))
-    {
-      storage_types.push_back(storage_type);
-    }
-  }
-  return storage_types;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStoragesWithHWZeroClampSupport() const
-{
-  std::vector<TensorStorageType> storage_types;
-  for (auto storage_type : {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
-                            TensorStorageType::TEXTURE_3D})
-  {
-    if (IsSupported(storage_type))
-    {
-      storage_types.push_back(storage_type);
-    }
-  }
-  return storage_types;
-}
-
-bool Environment::IsSupported(TensorStorageType storage_type) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::TEXTURE_2D:
-      return !device_.IsAMD();
-    case TensorStorageType::BUFFER:
-      return true;
-    case TensorStorageType::TEXTURE_ARRAY:
-      return !device_.IsAMD() && device_.SupportsTextureArray();
-    case TensorStorageType::IMAGE_BUFFER:
-      return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
-             device_.SupportsImageBuffer();
-    case TensorStorageType::TEXTURE_3D:
-      return !device_.IsAMD() && device_.SupportsImage3D();
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return false;
-    case TensorStorageType::UNKNOWN:
-      return false;
-  }
-  return false;
-}
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info)
-{
-  if (gpu_info.IsAdreno())
-  {
-    if (gpu_info.IsAdreno6xxOrHigher())
-    {
-      return TensorStorageType::TEXTURE_ARRAY;
-    }
-    else
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-  }
-  else if (gpu_info.IsPowerVR())
-  {
-    return TensorStorageType::TEXTURE_2D;
-  }
-  else if (gpu_info.IsMali())
-  {
-    const MaliInfo mali_info = gpu_info.mali_info;
-    if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall())
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-    else
-    {
-      return TensorStorageType::BUFFER;
-    }
-  }
-  else if (gpu_info.IsNvidia())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsAMD())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsIntel())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  return TensorStorageType::BUFFER;
-}
-
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info)
-{
-  if (gpu_info.IsAdreno())
-  {
-    if (gpu_info.IsAdreno3xx() || gpu_info.IsAdreno4xx())
-    {
-      return TensorStorageType::BUFFER;
-    }
-    else
-    {
-      return TensorStorageType::IMAGE_BUFFER;
-    }
-  }
-  else if (gpu_info.IsPowerVR())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsMali())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsNvidia())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsAMD())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsIntel())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  return TensorStorageType::BUFFER;
-}
-
-absl::Status CreateEnvironment(Environment *result)
-{
-  CLDevice gpu;
-  RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
-
-  CLContext context;
-  RETURN_IF_ERROR(CreateCLContext(gpu, &context));
-  CLCommandQueue queue;
-  RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
-  ProfilingCommandQueue profiling_queue;
-  RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
-
-  *result =
-    Environment(std::move(gpu), std::move(context), std::move(queue), std::move(profiling_queue));
-  return result->Init();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.h b/runtime/onert/backend/gpu_cl/open_cl/Environment.h

deleted file mode 100644 (file)

index 47866b5..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "DeviceInfo.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "ProgramCache.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Environment
-{
-public:
-  Environment() = default;
-  explicit Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
-                       ProfilingCommandQueue &&profiling_queue);
-  // Move only
-  Environment(Environment &&environment);
-  Environment &operator=(Environment &&environment);
-  Environment(const Environment &) = delete;
-  Environment &operator=(const Environment &) = delete;
-
-  const CLDevice &device() const { return device_; }
-  CLDevice *GetDevicePtr() { return &device_; }
-  const CLDevice *GetDevicePtr() const { return &device_; }
-  CLContext &context() { return context_; }
-  CLCommandQueue *queue() { return &queue_; }
-  ProfilingCommandQueue *profiling_queue() { return &profiling_queue_; }
-  ProgramCache *program_cache() { return &program_cache_; }
-  const ProgramCache *program_cache() const { return &program_cache_; }
-
-  std::vector<CalculationsPrecision> GetSupportedPrecisions() const;
-  bool IsSupported(CalculationsPrecision precision) const;
-  std::vector<TensorStorageType> GetSupportedStorages() const;
-  // returns storage types that support zero clamping when reading OOB in HW
-  // (Height/Width) dimensions.
-  std::vector<TensorStorageType> GetSupportedStoragesWithHWZeroClampSupport() const;
-  bool IsSupported(TensorStorageType storage_type) const;
-
-  absl::Status Init();
-
-  void SetHighPerformance() const;
-  void SetDefaultPerformance() const;
-  void SetLowPerformance() const; // for energy saving
-
-private:
-  CLDevice device_;
-  CLContext context_;
-  CLCommandQueue queue_;
-  ProfilingCommandQueue profiling_queue_;
-  ProgramCache program_cache_;
-};
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info);
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info);
-
-absl::Status CreateEnvironment(Environment *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc b/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc

deleted file mode 100644 (file)

index 774f815..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GpuObject.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string MemoryTypeToCLType(MemoryType type)
-{
-  switch (type)
-  {
-    case MemoryType::GLOBAL:
-      return "__global";
-    case MemoryType::CONSTANT:
-      return "__constant";
-      break;
-    case MemoryType::LOCAL:
-      return "__local";
-  }
-  return "";
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h b/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h

deleted file mode 100644 (file)

index a316302..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "ClContext.h"
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct GPUImage2DDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImage3DDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImage2DArrayDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImageBufferDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUCustomMemoryDescriptor
-{
-  std::string type_name = "";
-  cl_mem memory = nullptr;
-};
-
-enum class MemoryType
-{
-  GLOBAL,
-  CONSTANT,
-  LOCAL
-};
-
-std::string MemoryTypeToCLType(MemoryType type);
-
-struct GPUBufferDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  int element_size = 0;
-  MemoryType memory_type = MemoryType::GLOBAL;
-  std::vector<std::string> attributes;
-  cl_mem memory = nullptr;
-};
-
-struct GPUResources
-{
-  std::vector<std::string> ints;
-  std::vector<std::string> floats;
-  std::vector<std::pair<std::string, GPUBufferDescriptor>> buffers;
-  std::vector<std::pair<std::string, GPUImage2DDescriptor>> images2d;
-  std::vector<std::pair<std::string, GPUImage2DArrayDescriptor>> image2d_arrays;
-  std::vector<std::pair<std::string, GPUImage3DDescriptor>> images3d;
-  std::vector<std::pair<std::string, GPUImageBufferDescriptor>> image_buffers;
-  std::vector<std::pair<std::string, GPUCustomMemoryDescriptor>> custom_memories;
-
-  std::vector<std::string> GetNames() const
-  {
-    std::vector<std::string> names = ints;
-    names.insert(names.end(), floats.begin(), floats.end());
-    for (const auto &obj : buffers)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : images2d)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : image2d_arrays)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : images3d)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : image_buffers)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : custom_memories)
-    {
-      names.push_back(obj.first);
-    }
-    return names;
-  }
-};
-
-struct GPUResourcesWithValue
-{
-  std::vector<std::pair<std::string, int>> ints;
-  std::vector<std::pair<std::string, float>> floats;
-  std::vector<std::pair<std::string, cl_mem>> buffers;
-  std::vector<std::pair<std::string, cl_mem>> images2d;
-  std::vector<std::pair<std::string, cl_mem>> image2d_arrays;
-  std::vector<std::pair<std::string, cl_mem>> images3d;
-  std::vector<std::pair<std::string, cl_mem>> image_buffers;
-  std::vector<std::pair<std::string, cl_mem>> custom_memories;
-};
-
-class GPUObject;
-
-class GPUObjectDescriptor
-{
-public:
-  GPUObjectDescriptor() = default;
-  GPUObjectDescriptor(const GPUObjectDescriptor &) = default;
-  GPUObjectDescriptor &operator=(const GPUObjectDescriptor &) = default;
-  GPUObjectDescriptor(GPUObjectDescriptor &&obj_desc) : state_vars_(std::move(obj_desc.state_vars_))
-  {
-  }
-  GPUObjectDescriptor &operator=(GPUObjectDescriptor &&obj_desc)
-  {
-    if (this != &obj_desc)
-    {
-      state_vars_ = std::move(obj_desc.state_vars_);
-    }
-    return *this;
-  }
-  virtual ~GPUObjectDescriptor() = default;
-
-  void SetStateVar(const std::string &key, const std::string &value) const
-  {
-    state_vars_[key] = value;
-  }
-
-  virtual std::string PerformConstExpr(const std::string &) const { return ""; }
-
-  virtual absl::Status PerformSelector(const std::string &, const std::vector<std::string> &,
-                                       const std::vector<std::string> &, std::string *result) const
-  {
-    *result = "";
-    return absl::OkStatus();
-  }
-  virtual GPUResources GetGPUResources() const { return GPUResources(); }
-
-  virtual absl::Status CreateGPUObject(CLContext *, std::unique_ptr<GPUObject> *) const
-  {
-    return absl::OkStatus();
-  }
-  virtual void Release() {}
-
-  void SetAccess(AccessType access_type) { access_type_ = access_type; }
-  AccessType GetAccess() const { return access_type_; }
-
-protected:
-  // friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
-  //     const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
-  // friend void Decode(const data::GPUObjectDescriptor* fb_obj,
-  //                    GPUObjectDescriptor* obj);
-  mutable std::map<std::string, std::string> state_vars_;
-  AccessType access_type_ = AccessType::UNKNOWN;
-};
-
-using GPUObjectDescriptorPtr = std::unique_ptr<GPUObjectDescriptor>;
-
-class GPUObject
-{
-public:
-  GPUObject() = default;
-  // Move only
-  GPUObject(GPUObject &&obj_desc) = default;
-  GPUObject &operator=(GPUObject &&obj_desc) = default;
-  GPUObject(const GPUObject &) = delete;
-  GPUObject &operator=(const GPUObject &) = delete;
-  virtual ~GPUObject() = default;
-  virtual absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                       GPUResourcesWithValue *resources) const = 0;
-};
-
-using GPUObjectPtr = std::unique_ptr<GPUObject>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc

deleted file mode 100644 (file)

index afb7e29..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InferenceContext.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClDevice.h"
-
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "Precision.h"
-#include "StorageTypeUtil.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Model.h"
-#include "Operations.h"
-#include "Shape.h"
-#include "Types.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-CLNode::CLNode(CLNode &&node)
-  : operation(std::move(node.operation)), inputs(std::move(node.inputs)),
-    outputs(std::move(node.outputs)), name(std::move(node.name))
-{
-}
-
-CLNode &CLNode::operator=(CLNode &&node)
-{
-  if (this != &node)
-  {
-    operation = std::move(node.operation);
-    inputs = std::move(node.inputs);
-    outputs = std::move(node.outputs);
-    name = std::move(node.name);
-  }
-  return *this;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h

deleted file mode 100644 (file)

index ebe2c53..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClCommandQueue.h"
-#include "Environment.h"
-#include "GpuObject.h"
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "OpenclWrapper.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "Model.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct CLNode
-{
-  std::unique_ptr<GPUOperation> operation;
-  std::vector<ValueId> inputs;
-  std::vector<ValueId> outputs;
-
-  // Mostly for debug purposes.
-  std::string name;
-
-  CLNode() = default;
-
-  CLNode(CLNode &&node);
-  CLNode &operator=(CLNode &&node);
-  CLNode(const CLNode &) = delete;
-  CLNode &operator=(const CLNode &) = delete;
-};
-
-class InferenceContext
-{
-public:
-  struct CreateInferenceInfo
-  {
-    CalculationsPrecision precision;
-    TensorStorageType storage_type;
-    ModelHints hints;
-  };
-
-  struct DummyTensor
-  {
-    BHWC shape;
-    TensorDescriptor descriptor;
-
-    bool operator==(const DummyTensor &b) const
-    {
-      return shape == b.shape && descriptor == b.descriptor;
-    }
-  };
-
-  class TensorReserver
-  {
-  public:
-    ValueId Add(const std::shared_ptr<DummyTensor> dummy)
-    {
-      reservations_[next_] = std::move(dummy);
-      return next_++;
-    }
-    void Add(ValueId id, const std::shared_ptr<DummyTensor> dummy)
-    {
-      reservations_[id] = std::move(dummy);
-    }
-    void SetNext(ValueId id) { next_ = id; }
-    bool HaveTensor(ValueId id) { return reservations_.find(id) != reservations_.end(); }
-    std::shared_ptr<DummyTensor> Get(ValueId id) { return reservations_[id]; }
-
-    std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const
-    {
-      std::vector<std::pair<ValueId, TensorDescriptor>> result;
-      for (auto &v : reservations_)
-      {
-        TensorDescriptor desc = v.second->descriptor;
-        desc.shape.b = v.second->shape.b;
-        desc.shape.h = v.second->shape.h;
-        desc.shape.w = v.second->shape.w;
-        desc.shape.d = 1;
-        desc.shape.c = v.second->shape.c;
-        result.push_back({v.first, desc});
-      }
-      return result;
-    }
-
-    void Add(const std::vector<std::pair<ValueId, TensorDescriptor>> &tensors)
-    {
-      for (auto &v : tensors)
-      {
-        auto dummy = std::make_shared<DummyTensor>();
-        dummy->descriptor = v.second;
-        dummy->shape.b = v.second.shape.b;
-        dummy->shape.h = v.second.shape.h;
-        dummy->shape.w = v.second.shape.w;
-        dummy->shape.c = v.second.shape.c;
-        Add(v.first, dummy);
-      }
-    }
-
-  private:
-    std::unordered_map<ValueId, std::shared_ptr<DummyTensor>> reservations_;
-    ValueId next_ = 0;
-  };
-
-private:
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h b/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h

deleted file mode 100644 (file)

index f0423db..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-
-#include <stdint.h>
-
-#include <vector>
-
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace internal_tensor
-{
-
-// Meta function given element type returns a type for Tensor data container.
-template <DataType Type> struct StorageType;
-
-template <> struct StorageType<DataType::FLOAT32>
-{
-  using value = std::vector<float>;
-};
-
-template <> struct StorageType<DataType::INT32>
-{
-  using value = std::vector<int32_t>;
-};
-
-} // namespace internal_tensor
-
-template <typename ShapeT, DataType Type> struct InternalTensor
-{
-  using ShapeType = ShapeT;
-
-  constexpr static DataType kType = Type;
-
-  using TensorStorageType = typename internal_tensor::StorageType<Type>::value;
-
-  // Opaque id of a tensor.
-  int64_t id = -1;
-
-  ShapeType shape;
-
-  TensorStorageType data;
-};
-
-// TensorRef is a reference to another tensor. If an object should never hold
-// tensor data, then TensorRef should be used instead.
-template <typename ShapeT> struct TensorRef
-{
-  using ShapeType = ShapeT;
-
-  DataType type = DataType::UNKNOWN;
-
-  ShapeT shape;
-
-  // Opaque reference to a tensor. Upstream component is responsible for
-  // resolving this reference into an actual tensor.
-  int64_t ref = -1;
-
-  // Specifies if the tensor should be a variable input tensor that must be an
-  // output as well as an input to the graph.
-  bool is_variable_input = false;
-};
-
-template <typename ShapeT, DataType Type> constexpr DataType InternalTensor<ShapeT, Type>::kType;
-
-template <typename ShapeT, DataType Type>
-InternalTensor<ShapeT, Type> MakeZeroTensor(const ShapeT &shape)
-{
-  InternalTensor<ShapeT, Type> tensor;
-  tensor.shape = shape;
-  tensor.data =
-    typename InternalTensor<ShapeT, Type>::TensorStorageType(shape.DimensionsProduct(), 0);
-  return tensor;
-}
-
-using TensorFloat32 = InternalTensor<BHWC, DataType::FLOAT32>;
-using Tensor5DFloat32 = InternalTensor<BHWDC, DataType::FLOAT32>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc

deleted file mode 100644 (file)

index 3889d43..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LinearStorage.h"
-
-#include "absl/strings/str_cat.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), storage_type(desc.storage_type),
-    element_type(desc.element_type), memory_type(desc.memory_type), size(desc.size),
-    data(std::move(desc.data))
-{
-}
-
-TensorLinearDescriptor &TensorLinearDescriptor::operator=(TensorLinearDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(storage_type, desc.storage_type);
-    std::swap(element_type, desc.element_type);
-    std::swap(memory_type, desc.memory_type);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void TensorLinearDescriptor::Release() { data.clear(); }
-
-GPUResources TensorLinearDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  resources.ints.push_back("length");
-  if (storage_type == LinearStorageType::BUFFER)
-  {
-    GPUBufferDescriptor desc;
-    desc.data_type = element_type;
-    desc.access_type = access_type_;
-    desc.element_size = 4;
-    desc.memory_type = memory_type;
-    resources.buffers.push_back({"buffer", desc});
-  }
-  else
-  {
-    GPUImage2DDescriptor desc;
-    desc.data_type = element_type;
-    desc.access_type = access_type_;
-    resources.images2d.push_back({"tex2d", desc});
-  }
-  return resources;
-}
-
-absl::Status TensorLinearDescriptor::PerformSelector(const std::string &selector,
-                                                     const std::vector<std::string> &args,
-                                                     const std::vector<std::string> &,
-                                                     std::string *result) const
-{
-  if (selector == "Length")
-  {
-    *result = "length";
-    return absl::OkStatus();
-  }
-  else if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else if (selector == "GetPtr")
-  {
-    if (storage_type != LinearStorageType::BUFFER)
-    {
-      return absl::InvalidArgumentError(
-        "GetPtr selector supported for LinearStorageType::BUFFER only.");
-    }
-    *result = "buffer";
-    return absl::OkStatus();
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("TensorLinearDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status TensorLinearDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                         std::string *result) const
-{
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "TensorLinearDescriptor Read require one argument, but ", args.size(), " was passed"));
-  }
-  if (storage_type == LinearStorageType::BUFFER)
-  {
-    *result = absl::StrCat("buffer[", args[0], "]");
-    return absl::OkStatus();
-  }
-  else
-  {
-    const std::string read = element_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-    *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", 0))");
-    return absl::OkStatus();
-  }
-}
-
-absl::Status TensorLinearDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  LinearStorage gpu_storage;
-  RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context));
-  *result = absl::make_unique<LinearStorage>(std::move(gpu_storage));
-  return absl::OkStatus();
-}
-
-void TensorLinearDescriptor::UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src,
-                                              int aligned_size)
-{
-  size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size;
-  if (element_type == DataType::FLOAT32)
-  {
-    data.resize(size * sizeof(float) * 4);
-    float *gpu_data = reinterpret_cast<float *>(data.data());
-    for (int i = 0; i < size * 4; ++i)
-    {
-      if (i < src.shape.v)
-      {
-        gpu_data[i] = src.data[i];
-      }
-      else
-      {
-        gpu_data[i] = 0.0f;
-      }
-    }
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   data.resize(size * sizeof(half) * 4);
-  //   half* gpu_data = reinterpret_cast<half*>(data.data());
-  //   for (int i = 0; i < size * 4; ++i) {
-  //     if (i < src.shape.v) {
-  //       gpu_data[i] = src.data[i];
-  //     } else {
-  //       gpu_data[i] = 0.0f;
-  //     }
-  //   }
-  // }
-}
-
-void LinearStorage::Release()
-{
-  if (memory_)
-  {
-    clReleaseMemObject(memory_);
-    memory_ = nullptr;
-  }
-}
-
-LinearStorage::LinearStorage(LinearStorage &&storage)
-  : GPUObject(std::move(storage)), memory_(storage.memory_), depth_(storage.depth_),
-    storage_type_(storage.storage_type_)
-{
-  storage.memory_ = nullptr;
-}
-
-LinearStorage &LinearStorage::operator=(LinearStorage &&storage)
-{
-  if (this != &storage)
-  {
-    Release();
-    std::swap(memory_, storage.memory_);
-    std::swap(depth_, storage.depth_);
-    std::swap(storage_type_, storage.storage_type_);
-    GPUObject::operator=(std::move(storage));
-  }
-  return *this;
-}
-
-absl::Status LinearStorage::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                            GPUResourcesWithValue *resources) const
-{
-  const auto *linear_desc = dynamic_cast<const TensorLinearDescriptor *>(obj_ptr);
-  if (!linear_desc)
-  {
-    return absl::InvalidArgumentError("Expected TensorLinearDescriptor on input.");
-  }
-
-  resources->ints.push_back({"length", depth_});
-
-  if (storage_type_ == LinearStorageType::BUFFER)
-  {
-    resources->buffers.push_back({"buffer", memory_});
-  }
-  else
-  {
-    resources->images2d.push_back({"tex2d", memory_});
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status LinearStorage::CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
-                                                             CLContext *context)
-{
-  storage_type_ = desc.storage_type;
-  depth_ = desc.size;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  if (storage_type_ == LinearStorageType::BUFFER)
-  {
-    bool read_only = desc.memory_type == MemoryType::CONSTANT;
-    uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-    // TODO
-    // It doesn't support F16 yet. I will try to add it later.
-    //
-    // const int float4_size = desc.element_type == DataType::FLOAT32
-    //                             ? sizeof(float) * 4
-    //                             : sizeof(half) * 4;
-    const int float4_size = sizeof(float) * 4;
-    return CreateCLBuffer(context->context(), depth_ * float4_size, read_only, data_ptr, &memory_);
-  }
-  else
-  {
-    return CreateRGBAImage2D(context->context(), depth_, 1,
-                             DataTypeToChannelType(desc.element_type), data_ptr, &memory_);
-  }
-}
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type)
-{
-  if (tensor_storage_type == TensorStorageType::BUFFER)
-  {
-    return LinearStorageType::BUFFER;
-  }
-  else
-  {
-    return LinearStorageType::TEXTURE_2D;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h

deleted file mode 100644 (file)

index f6c3ac8..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-
-#include <string>
-#include <utility>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class LinearStorageType
-{
-  BUFFER,
-  TEXTURE_2D
-};
-
-struct TensorLinearDescriptor : public GPUObjectDescriptor
-{
-  LinearStorageType storage_type;
-  DataType element_type;                       // FLOAT32 or FLOAT16
-  MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER
-
-  // optional
-  int size = 0;
-  std::vector<uint8_t> data;
-
-  TensorLinearDescriptor() = default;
-  TensorLinearDescriptor(const TensorLinearDescriptor &) = default;
-  TensorLinearDescriptor &operator=(const TensorLinearDescriptor &) = default;
-  TensorLinearDescriptor(TensorLinearDescriptor &&desc);
-  TensorLinearDescriptor &operator=(TensorLinearDescriptor &&desc);
-
-  void UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src, int aligned_size = 0);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type);
-
-// Represent GPU 1D-array of FLT4(float4/half4) values
-// Can use inside texture2d or buffer
-class LinearStorage : public GPUObject
-{
-public:
-  LinearStorage() {}
-  ~LinearStorage() override { Release(); }
-
-  // Move only
-  LinearStorage(LinearStorage &&storage);
-  LinearStorage &operator=(LinearStorage &&storage);
-  LinearStorage(const LinearStorage &) = delete;
-  LinearStorage &operator=(const LinearStorage &) = delete;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
-                                                CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem memory_ = nullptr;
-  int depth_;
-  LinearStorageType storage_type_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Model.h b/runtime/onert/backend/gpu_cl/open_cl/Model.h

deleted file mode 100644 (file)

index f434bb2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Model.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-
-#include <string>
-
-#include "absl/types/any.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// There is yet another representation of CNN graph. The primary purpose of this
-// representation is to simplify graph manipulation.
-
-using ValueId = uint32_t;
-
-// Used to emulate quantized behavior.
-struct QuantizationParams
-{
-  float min = 0;
-  float max = 0;
-  float scale = 0;
-};
-
-struct Operation
-{
-  std::string type;
-  absl::any attributes;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h b/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h

deleted file mode 100644 (file)

index 474c56b..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ModelHints
-{
-  using ModelHint = uint64_t;
-
-  // By default we want the fastest inference.
-  static constexpr ModelHint kFastestInference = 0x00000000;
-  // Can improve compilation time, but inference can be slower.
-  static constexpr ModelHint kReduceKernelsCount = 0x00000001;
-  // Can improve tuning time, but inference can be slower.
-  static constexpr ModelHint kFastTuning = 0x00000002;
-
-  // Experimental.
-  // Can improve performance and memory consumption, but slow down
-  // initialization a lot and create more kernels.
-  static constexpr ModelHint kAllowSpecialKernels = 0x00000004;
-
-  void Add(ModelHint hint)
-  {
-    if (hint == kFastestInference)
-    {
-      hints = kFastestInference;
-    }
-    else
-    {
-      hints |= hint;
-    }
-  }
-
-  bool Check(ModelHint hint) const { return hints & hint; }
-
-  uint64_t hints = kFastestInference;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc

deleted file mode 100644 (file)

index dbaf6fa..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if defined(_WIN32)
-#define __WINDOWS__
-#endif
-
-#include "OpenclWrapper.h"
-
-#ifdef __WINDOWS__
-#include <windows.h>
-#else
-#include <dlfcn.h>
-#endif
-
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-#ifdef __ANDROID__
-#define LoadFunction(function)                                                 \
-  if (use_wrapper)                                                             \
-  {                                                                            \
-    function = reinterpret_cast<PFN_##function>(loadOpenCLPointer(#function)); \
-  }                                                                            \
-  else                                                                         \
-  {                                                                            \
-    function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function)); \
-  }
-#elif defined(__WINDOWS__)
-#define LoadFunction(function) \
-  function = reinterpret_cast<PFN_##function>(GetProcAddress(libopencl, #function));
-#else
-#define LoadFunction(function) \
-  function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function));
-#endif
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl);
-#else
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper);
-#endif
-
-absl::Status LoadOpenCL(void **libopencl)
-{
-#ifdef __WINDOWS__
-  HMODULE libopencl = LoadLibraryA("OpenCL.dll");
-  if (libopencl)
-  {
-    LoadOpenCLFunctions(libopencl);
-    return absl::OkStatus();
-  }
-  else
-  {
-    DWORD error_code = GetLastError();
-    return absl::UnknownError(
-      absl::StrCat("Can not open OpenCL library on this device, error code - ", error_code));
-  }
-#else
-  *libopencl = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL);
-  if (*libopencl)
-  {
-    LoadOpenCLFunctions(libopencl, false);
-    return absl::OkStatus();
-  }
-  // record error
-  std::string error(dlerror());
-#ifdef __ANDROID__
-  // Pixel phone or auto?
-  *libopencl = dlopen("libOpenCL-pixel.so", RTLD_NOW | RTLD_LOCAL);
-  if (!*libopencl)
-  {
-    *libopencl = dlopen("libOpenCL-car.so", RTLD_NOW | RTLD_LOCAL);
-  }
-  if (*libopencl)
-  {
-    typedef void (*enableOpenCL_t)();
-    enableOpenCL_t enableOpenCL =
-      reinterpret_cast<enableOpenCL_t>(dlsym(*libopencl, "enableOpenCL"));
-    enableOpenCL();
-    LoadOpenCLFunctions(libopencl, true);
-    return absl::OkStatus();
-  }
-#endif
-  return absl::UnknownError(absl::StrCat("Can not open OpenCL library on this device - ", error));
-#endif
-}
-
-void UnloadOpenCL(void *libopencl)
-{
-  if (libopencl)
-  {
-    dlclose(libopencl);
-  }
-}
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl)
-{
-#else
-#ifdef __ANDROID__
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper)
-{
-  typedef void *(*loadOpenCLPointer_t)(const char *name);
-  loadOpenCLPointer_t loadOpenCLPointer;
-  if (use_wrapper)
-  {
-    loadOpenCLPointer =
-      reinterpret_cast<loadOpenCLPointer_t>(dlsym(*libopencl, "loadOpenCLPointer"));
-  }
-#else
-void LoadOpenCLFunctions(void **libopencl, bool)
-{
-#endif // __ANDROID__
-#endif // __WINDOWS__
-
-  LoadFunction(clGetPlatformIDs);
-  LoadFunction(clGetPlatformInfo);
-  LoadFunction(clGetDeviceIDs);
-  LoadFunction(clGetDeviceInfo);
-  LoadFunction(clCreateSubDevices);
-  LoadFunction(clRetainDevice);
-  LoadFunction(clReleaseDevice);
-  LoadFunction(clCreateContext);
-  LoadFunction(clCreateContextFromType);
-  LoadFunction(clRetainContext);
-  LoadFunction(clReleaseContext);
-  LoadFunction(clGetContextInfo);
-  LoadFunction(clCreateCommandQueueWithProperties);
-  LoadFunction(clRetainCommandQueue);
-  LoadFunction(clReleaseCommandQueue);
-  LoadFunction(clGetCommandQueueInfo);
-  LoadFunction(clCreateBuffer);
-  LoadFunction(clCreateSubBuffer);
-  LoadFunction(clCreateImage);
-  LoadFunction(clCreatePipe);
-  LoadFunction(clRetainMemObject);
-  LoadFunction(clReleaseMemObject);
-  LoadFunction(clGetSupportedImageFormats);
-  LoadFunction(clGetMemObjectInfo);
-  LoadFunction(clGetImageInfo);
-  LoadFunction(clGetPipeInfo);
-  LoadFunction(clSetMemObjectDestructorCallback);
-  LoadFunction(clSVMAlloc);
-  LoadFunction(clSVMFree);
-  LoadFunction(clCreateSamplerWithProperties);
-  LoadFunction(clRetainSampler);
-  LoadFunction(clReleaseSampler);
-  LoadFunction(clGetSamplerInfo);
-  LoadFunction(clCreateProgramWithSource);
-  LoadFunction(clCreateProgramWithBinary);
-  LoadFunction(clCreateProgramWithBuiltInKernels);
-  LoadFunction(clRetainProgram);
-  LoadFunction(clReleaseProgram);
-  LoadFunction(clBuildProgram);
-  LoadFunction(clCompileProgram);
-  LoadFunction(clLinkProgram);
-  LoadFunction(clUnloadPlatformCompiler);
-  LoadFunction(clGetProgramInfo);
-  LoadFunction(clGetProgramBuildInfo);
-  LoadFunction(clCreateKernel);
-  LoadFunction(clCreateKernelsInProgram);
-  LoadFunction(clRetainKernel);
-  LoadFunction(clReleaseKernel);
-  LoadFunction(clSetKernelArg);
-  LoadFunction(clSetKernelArgSVMPointer);
-  LoadFunction(clSetKernelExecInfo);
-  LoadFunction(clGetKernelInfo);
-  LoadFunction(clGetKernelArgInfo);
-  LoadFunction(clGetKernelWorkGroupInfo);
-  LoadFunction(clWaitForEvents);
-  LoadFunction(clGetEventInfo);
-  LoadFunction(clCreateUserEvent);
-  LoadFunction(clRetainEvent);
-  LoadFunction(clReleaseEvent);
-  LoadFunction(clSetUserEventStatus);
-  LoadFunction(clSetEventCallback);
-  LoadFunction(clGetEventProfilingInfo);
-  LoadFunction(clFlush);
-  LoadFunction(clFinish);
-  LoadFunction(clEnqueueReadBuffer);
-  LoadFunction(clEnqueueReadBufferRect);
-  LoadFunction(clEnqueueWriteBuffer);
-  LoadFunction(clEnqueueWriteBufferRect);
-  LoadFunction(clEnqueueFillBuffer);
-  LoadFunction(clEnqueueCopyBuffer);
-  LoadFunction(clEnqueueCopyBufferRect);
-  LoadFunction(clEnqueueReadImage);
-  LoadFunction(clEnqueueWriteImage);
-  LoadFunction(clEnqueueFillImage);
-  LoadFunction(clEnqueueCopyImage);
-  LoadFunction(clEnqueueCopyImageToBuffer);
-  LoadFunction(clEnqueueCopyBufferToImage);
-  LoadFunction(clEnqueueMapBuffer);
-  LoadFunction(clEnqueueMapImage);
-  LoadFunction(clEnqueueUnmapMemObject);
-  LoadFunction(clEnqueueMigrateMemObjects);
-  LoadFunction(clEnqueueNDRangeKernel);
-  LoadFunction(clEnqueueNativeKernel);
-  LoadFunction(clEnqueueMarkerWithWaitList);
-  LoadFunction(clEnqueueBarrierWithWaitList);
-  LoadFunction(clEnqueueSVMFree);
-  LoadFunction(clEnqueueSVMMemcpy);
-  LoadFunction(clEnqueueSVMMemFill);
-  LoadFunction(clEnqueueSVMMap);
-  LoadFunction(clEnqueueSVMUnmap);
-  LoadFunction(clGetExtensionFunctionAddressForPlatform);
-  LoadFunction(clCreateImage2D);
-  LoadFunction(clCreateImage3D);
-  LoadFunction(clEnqueueMarker);
-  LoadFunction(clEnqueueWaitForEvents);
-  LoadFunction(clEnqueueBarrier);
-  LoadFunction(clUnloadCompiler);
-  LoadFunction(clGetExtensionFunctionAddress);
-  LoadFunction(clCreateCommandQueue);
-  LoadFunction(clCreateSampler);
-  LoadFunction(clEnqueueTask);
-
-  // OpenGL sharing
-  LoadFunction(clCreateFromGLBuffer);
-  LoadFunction(clCreateFromGLTexture);
-  LoadFunction(clEnqueueAcquireGLObjects);
-  LoadFunction(clEnqueueReleaseGLObjects);
-
-  // cl_khr_egl_event extension
-  LoadFunction(clCreateEventFromEGLSyncKHR);
-
-  // EGL sharing
-  LoadFunction(clCreateFromEGLImageKHR);
-  LoadFunction(clEnqueueAcquireEGLObjectsKHR);
-  LoadFunction(clEnqueueReleaseEGLObjectsKHR);
-} // namespace gpu_cl
-
-// No OpenCL support, do not set function addresses
-PFN_clGetPlatformIDs clGetPlatformIDs;
-PFN_clGetPlatformInfo clGetPlatformInfo;
-PFN_clGetDeviceIDs clGetDeviceIDs;
-PFN_clGetDeviceInfo clGetDeviceInfo;
-PFN_clCreateSubDevices clCreateSubDevices;
-PFN_clRetainDevice clRetainDevice;
-PFN_clReleaseDevice clReleaseDevice;
-PFN_clCreateContext clCreateContext;
-PFN_clCreateContextFromType clCreateContextFromType;
-PFN_clRetainContext clRetainContext;
-PFN_clReleaseContext clReleaseContext;
-PFN_clGetContextInfo clGetContextInfo;
-PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-PFN_clRetainCommandQueue clRetainCommandQueue;
-PFN_clReleaseCommandQueue clReleaseCommandQueue;
-PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-PFN_clCreateBuffer clCreateBuffer;
-PFN_clCreateSubBuffer clCreateSubBuffer;
-PFN_clCreateImage clCreateImage;
-PFN_clCreatePipe clCreatePipe;
-PFN_clRetainMemObject clRetainMemObject;
-PFN_clReleaseMemObject clReleaseMemObject;
-PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-PFN_clGetMemObjectInfo clGetMemObjectInfo;
-PFN_clGetImageInfo clGetImageInfo;
-PFN_clGetPipeInfo clGetPipeInfo;
-PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-PFN_clSVMAlloc clSVMAlloc;
-PFN_clSVMFree clSVMFree;
-PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-PFN_clRetainSampler clRetainSampler;
-PFN_clReleaseSampler clReleaseSampler;
-PFN_clGetSamplerInfo clGetSamplerInfo;
-PFN_clCreateProgramWithSource clCreateProgramWithSource;
-PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-PFN_clRetainProgram clRetainProgram;
-PFN_clReleaseProgram clReleaseProgram;
-PFN_clBuildProgram clBuildProgram;
-PFN_clCompileProgram clCompileProgram;
-PFN_clLinkProgram clLinkProgram;
-PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-PFN_clGetProgramInfo clGetProgramInfo;
-PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-PFN_clCreateKernel clCreateKernel;
-PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-PFN_clRetainKernel clRetainKernel;
-PFN_clReleaseKernel clReleaseKernel;
-PFN_clSetKernelArg clSetKernelArg;
-PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-PFN_clSetKernelExecInfo clSetKernelExecInfo;
-PFN_clGetKernelInfo clGetKernelInfo;
-PFN_clGetKernelArgInfo clGetKernelArgInfo;
-PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-PFN_clWaitForEvents clWaitForEvents;
-PFN_clGetEventInfo clGetEventInfo;
-PFN_clCreateUserEvent clCreateUserEvent;
-PFN_clRetainEvent clRetainEvent;
-PFN_clReleaseEvent clReleaseEvent;
-PFN_clSetUserEventStatus clSetUserEventStatus;
-PFN_clSetEventCallback clSetEventCallback;
-PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-PFN_clFlush clFlush;
-PFN_clFinish clFinish;
-PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-PFN_clEnqueueReadImage clEnqueueReadImage;
-PFN_clEnqueueWriteImage clEnqueueWriteImage;
-PFN_clEnqueueFillImage clEnqueueFillImage;
-PFN_clEnqueueCopyImage clEnqueueCopyImage;
-PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-PFN_clEnqueueMapImage clEnqueueMapImage;
-PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-PFN_clEnqueueSVMFree clEnqueueSVMFree;
-PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-PFN_clEnqueueSVMMap clEnqueueSVMMap;
-PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-PFN_clCreateImage2D clCreateImage2D;
-PFN_clCreateImage3D clCreateImage3D;
-PFN_clEnqueueMarker clEnqueueMarker;
-PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-PFN_clEnqueueBarrier clEnqueueBarrier;
-PFN_clUnloadCompiler clUnloadCompiler;
-PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-PFN_clCreateCommandQueue clCreateCommandQueue;
-PFN_clCreateSampler clCreateSampler;
-PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-PFN_clCreateFromGLTexture clCreateFromGLTexture;
-PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret)
-{
-  if (clCreateImage)
-  { // clCreateImage available since OpenCL 1.2
-    return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
-  }
-  else
-  {
-    return clCreateImage2D(context, flags, image_format, image_desc->image_width,
-                           image_desc->image_height, image_desc->image_row_pitch, host_ptr,
-                           errcode_ret);
-  }
-}
-
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret)
-{
-  if (clCreateImage)
-  { // clCreateImage available since OpenCL 1.2
-    return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
-  }
-  else
-  {
-    return clCreateImage3D(context, flags, image_format, image_desc->image_width,
-                           image_desc->image_height, image_desc->image_depth,
-                           image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr,
-                           errcode_ret);
-  }
-}
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h

deleted file mode 100644 (file)

index 021f873..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-
-#include "CL/cl.h"
-#include "CL/cl_egl.h"
-#include "CL/cl_ext.h"
-#include "CL/cl_gl.h"
-#include "CL/cl_platform.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-absl::Status LoadOpenCL(void **libopencl);
-void UnloadOpenCL(void *libopencl);
-
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformIDs)(
-  cl_uint /* num_entries */, cl_platform_id * /* platforms */,
-  cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformInfo)(
-  cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceIDs)(
-  cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */,
-  cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceInfo)(
-  cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateSubDevices)(
-  cl_device_id /* in_device */, const cl_device_partition_property * /* properties */,
-  cl_uint /* num_devices */, cl_device_id * /* out_devices */,
-  cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainDevice)(cl_device_id /* device */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clReleaseDevice)(cl_device_id /* device */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_context(CL_API_CALL *PFN_clCreateContext)(
-  const cl_context_properties * /* properties */, cl_uint /* num_devices */,
-  const cl_device_id * /* devices */,
-  void(CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_context(CL_API_CALL *PFN_clCreateContextFromType)(
-  const cl_context_properties * /* properties */, cl_device_type /* device_type */,
-  void(CL_CALLBACK * /* pfn_notify*/)(const char *, const void *, size_t, void *),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainContext)(cl_context /* context */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseContext)(cl_context /* context */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetContextInfo)(
-  cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueueWithProperties)(
-  cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainCommandQueue)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseCommandQueue)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetCommandQueueInfo)(
-  cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateBuffer)(
-  cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateSubBuffer)(
-  cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */,
-  const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage)(
-  cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
-  const cl_image_desc * /* image_desc */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreatePipe)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */,
-  cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSupportedImageFormats)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */,
-  cl_uint /* num_entries */, cl_image_format * /* image_formats */,
-  cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetMemObjectInfo)(
-  cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetImageInfo)(
-  cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPipeInfo)(
-  cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetMemObjectDestructorCallback)(
-  cl_mem /* memobj */,
-  void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/),
-  void * /*user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef void *(CL_API_CALL *PFN_clSVMAlloc)(cl_context /* context */, cl_svm_mem_flags /* flags */,
-                                            size_t /* size */,
-                                            cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0;
-typedef void(CL_API_CALL *PFN_clSVMFree)(cl_context /* context */,
-                                         void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSamplerWithProperties)(
-  cl_context /* context */, const cl_sampler_properties * /* normalized_coords */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainSampler)(cl_sampler /* sampler */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseSampler)(cl_sampler /* sampler */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSamplerInfo)(
-  cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithSource)(
-  cl_context /* context */, cl_uint /* count */, const char ** /* strings */,
-  const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBinary)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBuiltInKernels)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainProgram)(cl_program /* program */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseProgram)(cl_program /* program */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clBuildProgram)(
-  cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCompileProgram)(
-  cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */, cl_uint /* num_input_headers */,
-  const cl_program * /* input_headers */, const char ** /* header_include_names */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_program(CL_API_CALL *PFN_clLinkProgram)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */, cl_uint /* num_input_programs */,
-  const cl_program * /* input_programs */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clUnloadPlatformCompiler)(cl_platform_id /* platform */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramInfo)(
-  cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramBuildInfo)(
-  cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_kernel(CL_API_CALL *PFN_clCreateKernel)(
-  cl_program /* program */, const char * /* kernel_name */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateKernelsInProgram)(
-  cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */,
-  cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArg)(cl_kernel /* kernel */, cl_uint /* arg_index */,
-                                                size_t /* arg_size */, const void * /* arg_value */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArgSVMPointer)(
-  cl_kernel /* kernel */, cl_uint /* arg_index */,
-  const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelExecInfo)(
-  cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */,
-  const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelInfo)(
-  cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelArgInfo)(
-  cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelWorkGroupInfo)(
-  cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clWaitForEvents)(
-  cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetEventInfo)(
-  cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_event(CL_API_CALL *PFN_clCreateUserEvent)(
-  cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clRetainEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetUserEventStatus)(
-  cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clSetEventCallback)(
-  cl_event /* event */, cl_int /* command_exec_callback_type */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clGetEventProfilingInfo)(
-  cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFlush)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFinish)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
-  size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
-  const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
-  size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
-  size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
-  size_t /* offset */, size_t /* size */, const void * /* ptr */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
-  const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
-  size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
-  size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */,
-  size_t /* pattern_size */, size_t /* offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
-  size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
-  const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */,
-  size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */,
-  size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */,
-  size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */,
-  size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImage)(
-  cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */,
-  const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */,
-  const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImageToBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */,
-  const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferToImage)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */,
-  size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */,
-  cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */,
-  cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */,
-  size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueUnmapMemObject)(
-  cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMigrateMemObjects)(
-  cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */,
-  const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNDRangeKernel)(
-  cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */,
-  const size_t * /* global_work_offset */, const size_t * /* global_work_size */,
-  const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNativeKernel)(
-  cl_command_queue /* command_queue */, void(CL_CALLBACK * /*user_func*/)(void *),
-  void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */,
-  const cl_mem * /* mem_list */, const void ** /* args_mem_loc */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarkerWithWaitList)(
-  cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrierWithWaitList)(
-  cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMFree)(
-  cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */,
-  void *[] /* svm_pointers[] */,
-  void(CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
-                                        cl_uint /* num_svm_pointers */,
-                                        void *[] /* svm_pointers[] */, void * /* user_data */),
-  void * /* user_data */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemcpy)(
-  cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */,
-  const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemFill)(
-  cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */,
-  size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMap)(
-  cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */,
-  void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMUnmap)(
-  cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddressForPlatform)(
-  cl_platform_id /* platform */, const char * /* func_name */)CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage2D)(cl_context /* context */, cl_mem_flags /* flags */,
-                                                 const cl_image_format * /* image_format */,
-                                                 size_t /* image_width */,
-                                                 size_t /* image_height */,
-                                                 size_t /* image_row_pitch */,
-                                                 void * /* host_ptr */, cl_int * /* errcode_ret */);
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage3D)(
-  cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
-  size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */,
-  size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarker)(cl_command_queue /* command_queue */,
-                                                 cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
-                                                        cl_uint /* num_events */,
-                                                        const cl_event * /* event_list */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrier)(cl_command_queue /* command_queue */);
-typedef cl_int(CL_API_CALL *PFN_clUnloadCompiler)();
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddress)(const char * /* func_name */);
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueue)(
-  cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */,
-  cl_int * /* errcode_ret */);
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSampler)(cl_context /* context */,
-                                                     cl_bool /* normalized_coords */,
-                                                     cl_addressing_mode /* addressing_mode */,
-                                                     cl_filter_mode /* filter_mode */,
-                                                     cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueTask)(cl_command_queue /* command_queue */,
-                                               cl_kernel /* kernel */,
-                                               cl_uint /* num_events_in_wait_list */,
-                                               const cl_event * /* event_wait_list */,
-                                               cl_event * /* event */);
-
-// OpenGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int *);
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLTexture)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */,
-  cl_GLint /* miplevel */, cl_GLuint /* texture */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireGLObjects)(cl_command_queue /* command_queue */,
-                                                           cl_uint /* num_objects */,
-                                                           const cl_mem * /* mem_objects */,
-                                                           cl_uint /* num_events_in_wait_list */,
-                                                           const cl_event * /* event_wait_list */,
-                                                           cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseGLObjects)(
-  cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-
-// cl_khr_egl_event extension
-
-// CLeglDisplayKHR is an opaque handle to an EGLDisplay
-typedef void *CLeglDisplayKHR;
-
-// CLeglSyncKHR is an opaque handle to an EGLSync object
-typedef void *CLeglSyncKHR;
-
-typedef cl_event(CL_API_CALL *PFN_clCreateEventFromEGLSyncKHR)(cl_context /* context */,
-                                                               CLeglSyncKHR /* sync */,
-                                                               CLeglDisplayKHR /* display */,
-                                                               cl_int * /* errcode_ret */);
-
-// EGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromEGLImageKHR)(
-  cl_context /*context*/, CLeglDisplayKHR /*display*/, CLeglImageKHR /*image*/,
-  cl_mem_flags /*flags*/, const cl_egl_image_properties_khr * /*properties*/,
-  cl_int * /*errcode_ret*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireEGLObjectsKHR)(
-  cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
-  cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseEGLObjectsKHR)(
-  cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
-  cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-
-extern PFN_clGetPlatformIDs clGetPlatformIDs;
-extern PFN_clGetPlatformInfo clGetPlatformInfo;
-extern PFN_clGetDeviceIDs clGetDeviceIDs;
-extern PFN_clGetDeviceInfo clGetDeviceInfo;
-extern PFN_clCreateSubDevices clCreateSubDevices;
-extern PFN_clRetainDevice clRetainDevice;
-extern PFN_clReleaseDevice clReleaseDevice;
-extern PFN_clCreateContext clCreateContext;
-extern PFN_clCreateContextFromType clCreateContextFromType;
-extern PFN_clRetainContext clRetainContext;
-extern PFN_clReleaseContext clReleaseContext;
-extern PFN_clGetContextInfo clGetContextInfo;
-extern PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-extern PFN_clRetainCommandQueue clRetainCommandQueue;
-extern PFN_clReleaseCommandQueue clReleaseCommandQueue;
-extern PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-extern PFN_clCreateBuffer clCreateBuffer;
-extern PFN_clCreateSubBuffer clCreateSubBuffer;
-extern PFN_clCreateImage clCreateImage;
-extern PFN_clCreatePipe clCreatePipe;
-extern PFN_clRetainMemObject clRetainMemObject;
-extern PFN_clReleaseMemObject clReleaseMemObject;
-extern PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-extern PFN_clGetMemObjectInfo clGetMemObjectInfo;
-extern PFN_clGetImageInfo clGetImageInfo;
-extern PFN_clGetPipeInfo clGetPipeInfo;
-extern PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-extern PFN_clSVMAlloc clSVMAlloc;
-extern PFN_clSVMFree clSVMFree;
-extern PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-extern PFN_clRetainSampler clRetainSampler;
-extern PFN_clReleaseSampler clReleaseSampler;
-extern PFN_clGetSamplerInfo clGetSamplerInfo;
-extern PFN_clCreateProgramWithSource clCreateProgramWithSource;
-extern PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-extern PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-extern PFN_clRetainProgram clRetainProgram;
-extern PFN_clReleaseProgram clReleaseProgram;
-extern PFN_clBuildProgram clBuildProgram;
-extern PFN_clCompileProgram clCompileProgram;
-extern PFN_clLinkProgram clLinkProgram;
-extern PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-extern PFN_clGetProgramInfo clGetProgramInfo;
-extern PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-extern PFN_clCreateKernel clCreateKernel;
-extern PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-extern PFN_clRetainKernel clRetainKernel;
-extern PFN_clReleaseKernel clReleaseKernel;
-extern PFN_clSetKernelArg clSetKernelArg;
-extern PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-extern PFN_clSetKernelExecInfo clSetKernelExecInfo;
-extern PFN_clGetKernelInfo clGetKernelInfo;
-extern PFN_clGetKernelArgInfo clGetKernelArgInfo;
-extern PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-extern PFN_clWaitForEvents clWaitForEvents;
-extern PFN_clGetEventInfo clGetEventInfo;
-extern PFN_clCreateUserEvent clCreateUserEvent;
-extern PFN_clRetainEvent clRetainEvent;
-extern PFN_clReleaseEvent clReleaseEvent;
-extern PFN_clSetUserEventStatus clSetUserEventStatus;
-extern PFN_clSetEventCallback clSetEventCallback;
-extern PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-extern PFN_clFlush clFlush;
-extern PFN_clFinish clFinish;
-extern PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-extern PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-extern PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-extern PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-extern PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-extern PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-extern PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-extern PFN_clEnqueueReadImage clEnqueueReadImage;
-extern PFN_clEnqueueWriteImage clEnqueueWriteImage;
-extern PFN_clEnqueueFillImage clEnqueueFillImage;
-extern PFN_clEnqueueCopyImage clEnqueueCopyImage;
-extern PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-extern PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-extern PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-extern PFN_clEnqueueMapImage clEnqueueMapImage;
-extern PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-extern PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-extern PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-extern PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-extern PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-extern PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-extern PFN_clEnqueueSVMFree clEnqueueSVMFree;
-extern PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-extern PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-extern PFN_clEnqueueSVMMap clEnqueueSVMMap;
-extern PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-extern PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-extern PFN_clCreateImage2D clCreateImage2D;
-extern PFN_clCreateImage3D clCreateImage3D;
-extern PFN_clEnqueueMarker clEnqueueMarker;
-extern PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-extern PFN_clEnqueueBarrier clEnqueueBarrier;
-extern PFN_clUnloadCompiler clUnloadCompiler;
-extern PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-extern PFN_clCreateCommandQueue clCreateCommandQueue;
-extern PFN_clCreateSampler clCreateSampler;
-extern PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-extern PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-extern PFN_clCreateFromGLTexture clCreateFromGLTexture;
-extern PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-extern PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-extern PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-// For convenient image creation
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage2D
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret);
-
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage3D
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc b/runtime/onert/backend/gpu_cl/open_cl/Operations.cc

deleted file mode 100644 (file)

index 2608b53..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc
+++ /dev/null
@@ -1,704 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Operations.h"
-#include "open_cl/Operations.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#include <unordered_map>
-
-#include "absl/container/flat_hash_map.h"
-
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Padding2D &Padding2D::operator=(const Padding2D &value)
-{
-  prepended = value.prepended;
-  appended = value.appended;
-  return *this;
-}
-
-bool Padding2D::operator==(const Padding2D &value)
-{
-  return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding2D::operator!=(const Padding2D &value) { return !(*this == value); }
-
-Padding2D &Padding2D::operator-(const Padding2D &value)
-{
-  prepended.h -= value.prepended.h;
-  prepended.w -= value.prepended.w;
-  appended.h -= value.appended.h;
-  appended.w -= value.appended.w;
-  return *this;
-}
-
-Padding3D &Padding3D::operator=(const Padding3D &value)
-{
-  prepended = value.prepended;
-  appended = value.appended;
-  return *this;
-}
-
-bool Padding3D::operator==(const Padding3D &value)
-{
-  return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding3D::operator!=(const Padding3D &value) { return !(*this == value); }
-
-Padding3D &Padding3D::operator-(const Padding3D &value)
-{
-  prepended.h -= value.prepended.h;
-  prepended.w -= value.prepended.w;
-  prepended.d -= value.prepended.d;
-  appended.h -= value.appended.h;
-  appended.w -= value.appended.w;
-  appended.d -= value.appended.d;
-  return *this;
-}
-
-std::string ToString(enum OperationType op)
-{
-  switch (op)
-  {
-    // case OperationType::ABS:
-    //   return "abs";
-    case OperationType::ADD:
-      return "add";
-    // case OperationType::CONCAT:
-    //   return "concat";
-    // case OperationType::COS:
-    //   return "cos";
-    // case OperationType::EXP:
-    //   return "exp";
-    // case OperationType::LOG:
-    //   return "log";
-    // case OperationType::NEG:
-    //   return "neg";
-    // case OperationType::POOLING_2D:
-    //   return "pooling_2d";
-    // case OperationType::REDUCE_MAXIMUM:
-    //   return "reduce_maximum";
-    // case OperationType::REDUCE_MINIMUM:
-    //   return "reduce_minimum";
-    // case OperationType::REDUCE_PRODUCT:
-    //   return "reduce_product";
-    // case OperationType::REDUCE_SUM:
-    //   return "reduce_sum";
-    // case OperationType::RESIZE:
-    //   return "resize";
-    // case OperationType::RELU:
-    //   return "relu";
-    // case OperationType::RSQRT:
-    //   return "rsqrt";
-    // case OperationType::SQRT:
-    //   return "sqrt";
-    // case OperationType::SQUARE:
-    //   return "square";
-    case OperationType::UNKNOWN:
-      return "unknown_operation";
-  }
-  return "";
-}
-
-OperationType OperationTypeFromString(const std::string &name)
-{
-  static const auto operations = new std::unordered_map<std::string, OperationType>({
-    // {"abs", OperationType::ABS},
-    {"add", OperationType::ADD},
-    // {"concat", OperationType::CONCAT},
-    // {"cos", OperationType::COS},
-    // {"exp", OperationType::EXP},
-    // {"log", OperationType::LOG},
-    // {"neg", OperationType::NEG},
-    // {"pooling_2d", OperationType::POOLING_2D},
-    // {"reduce_maximum", OperationType::REDUCE_MAXIMUM},
-    // {"reduce_minimum", OperationType::REDUCE_MINIMUM},
-    // {"reduce_product", OperationType::REDUCE_PRODUCT},
-    // {"reduce_sum", OperationType::REDUCE_SUM},
-    // {"relu", OperationType::RELU},
-    // {"resize", OperationType::RESIZE},
-    // {"rsqrt", OperationType::RSQRT},
-    // {"sqrt", OperationType::SQRT},
-    // {"square", OperationType::SQUARE},
-  });
-  auto op = operations->find(name);
-  return op == operations->end() ? OperationType::UNKNOWN : op->second;
-}
-
-namespace
-{
-
-template <typename T> T DivideRoundUp(T n, T divisor) { return (n - 1) / divisor + 1; }
-
-int32_t CalculateOutputSizeBeforeStrides(int32_t input, int32_t kernel, int32_t padding,
-                                         int32_t dilation)
-{
-  const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
-  return input + padding - dilated_kernel + 1;
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(
-    input.get<T>(), attr.weights.shape.get<T>(),
-    attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(
-    input.get<T>(), attr.weights.shape.get<T>(),
-    attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
-                                          attr.padding.prepended.get<T>() +
-                                            attr.padding.appended.get<T>(),
-                                          /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
-                                          attr.padding.prepended.get<T>() +
-                                            attr.padding.appended.get<T>(),
-                                          /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return (input.get<T>() - 1) * attr.stride.get<T>() -
-         (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
-         attr.weights.shape.get<T>() + attr.adjacent.get<T>();
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return (input.get<T>() - 1) * attr.stride.get<T>() -
-         (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
-         attr.weights.shape.get<T>();
-}
-
-inline int32_t StridedSize(int32_t size, int32_t stride)
-{
-  return stride == 0 ? -1 : DivideRoundUp(size, stride);
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWC &input, const AttrT &attr)
-{
-  return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
-                     attr.strides.template get<AxisT>());
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWDC &input, const AttrT &attr)
-{
-  return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
-                     attr.strides.template get<AxisT>());
-}
-
-int32_t CalculateSamePadding(int32_t input, int32_t kernel, int32_t dilation, int32_t stride)
-{
-  const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
-  return std::max(0, dilated_kernel - (input - 1) % stride - 1);
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-Padding2D MakeSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  Padding2D padding;
-  padding.prepended = HW(padding_height / 2, padding_width / 2);
-  padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
-  return padding;
-}
-
-Padding3D MakeSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
-  Padding3D padding;
-  padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
-  padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
-                         padding_depth - padding_depth / 2);
-  return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding2D MakeSamePadding(const BHWC &input, const AttrT &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  Padding2D padding;
-  padding.prepended = HW(padding_height / 2, padding_width / 2);
-  padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
-  return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding3D MakeSamePadding(const BHWDC &input, const AttrT &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
-  Padding3D padding;
-  padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
-  padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
-                         padding_depth - padding_depth / 2);
-  return padding;
-}
-
-} // namespace
-
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return BHWC(
-    input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
-    input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return BHWDC(
-    input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
-    input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w,
-    input.d * attr.strides.d - attr.padding.prepended.d - attr.padding.appended.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr), input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
-                attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
-                 attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr)
-{
-  (void)input;
-  return BHWC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
-              StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
-              StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
-              StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr)
-{
-  (void)input;
-  return BHWDC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
-               StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
-               StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
-               StridedSize(attr.ends.d - attr.starts.d, attr.strides.d),
-               StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr)
-{
-  return BHWC(
-    attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
-    attr.appended.w + attr.prepended.w + input.w, attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr)
-{
-  return BHWDC(
-    attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
-    attr.appended.w + attr.prepended.w + input.w, attr.appended.d + attr.prepended.d + input.d,
-    attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr)
-{
-  return BHWC(input.b, 1, 1, attr.weights.shape.o);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr)
-{
-  const int b = attr.dims.find(Axis::BATCH) == attr.dims.end() ? input.b : 1;
-  const int h = attr.dims.find(Axis::HEIGHT) == attr.dims.end() ? input.h : 1;
-  const int w = attr.dims.find(Axis::WIDTH) == attr.dims.end() ? input.w : 1;
-  const int c = attr.dims.find(Axis::CHANNELS) == attr.dims.end() ? input.c : 1;
-  return BHWC(b, h, w, c);
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
-                                  BHWC *output_shape)
-{
-  BHWC new_shape = input[0];
-  switch (attr.axis)
-  {
-    case Axis::CHANNELS:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Width and Batch must be the same when concatenating "
-            "by channels axis");
-        }
-        new_shape.c += input[i].c;
-      }
-      break;
-    case Axis::HEIGHT:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Channels, Width and Batch must be the same when concatenating "
-            "by height axis");
-        }
-        new_shape.h += input[i].h;
-      }
-      break;
-    case Axis::WIDTH:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Channels and Batch must be the same when concatenating "
-            "by width axis");
-        }
-        new_shape.w += input[i].w;
-      }
-      break;
-    case Axis::BATCH:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].w != new_shape.w)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height and Channels must be the same when concatenating "
-            "by batch axis");
-        }
-        new_shape.b += input[i].b;
-      }
-      break;
-    default:
-      return absl::InvalidArgumentError("Invalid axis");
-      break;
-  }
-  *output_shape = new_shape;
-  return absl::OkStatus();
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
-                                  BHWDC *output_shape)
-{
-  BHWDC new_shape = input[0];
-  switch (attr.axis)
-  {
-    case Axis::CHANNELS:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError("Height, Width, Batch and Depth must be the same when "
-                                            "concatenating "
-                                            "by channels axis");
-        }
-        new_shape.c += input[i].c;
-      }
-      break;
-    case Axis::HEIGHT:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Depth, Batch and Channels must be the same when "
-            "concatenating "
-            "by height axis");
-        }
-        new_shape.h += input[i].h;
-      }
-      break;
-    case Axis::WIDTH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Depth, Batch and Channels must be the same when "
-            "concatenating "
-            "by width axis");
-        }
-        new_shape.w += input[i].w;
-      }
-      break;
-    case Axis::DEPTH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height, Batch and Channels must be the same when "
-            "concatenating "
-            "by depth axis");
-        }
-        new_shape.d += input[i].d;
-      }
-      break;
-    case Axis::BATCH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
-            input[i].d != new_shape.d)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height, Depth and Channels must be the same when "
-            "concatenating "
-            "by batch axis");
-        }
-        new_shape.b += input[i].b;
-      }
-      break;
-    default:
-      return absl::InvalidArgumentError("Invalid axis");
-  }
-  *output_shape = new_shape;
-  return absl::OkStatus();
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr)
-{
-  return attr.align_corners && input_size > 1 && output_size > 1
-           ? static_cast<float>(input_size - 1) / (output_size - 1)
-           : static_cast<float>(input_size) / output_size;
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr)
-{
-  return attr.align_corners && input_size > 1 && output_size > 1
-           ? static_cast<float>(input_size - 1) / (output_size - 1)
-           : static_cast<float>(input_size) / output_size;
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr)
-{
-  return BHWC(input.b, attr.new_shape.h, attr.new_shape.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr)
-{
-  return BHWDC(input.b, attr.new_shape.h, attr.new_shape.w, attr.new_shape.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr)
-{
-  return BHWC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
-              input.get(attr.perm.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr)
-{
-  return BHWDC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
-               input.get(attr.perm.d), input.get(attr.perm.c));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.h b/runtime/onert/backend/gpu_cl/open_cl/Operations.h

deleted file mode 100644 (file)

index 825eb90..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.h
+++ /dev/null
@@ -1,586 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-
-#include <cstdint>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "absl/types/variant.h"
-
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class OperationType
-{
-  UNKNOWN = 0,
-  // ABS,
-  ADD,
-  // BATCH_TO_SPACE,
-  // BATCH_NORMALIZATION,
-  // BATCHED_MATMUL,
-  // CONCAT,
-  // CONST,
-  // CONVOLUTION_2D,
-  // CONVOLUTION_TRANSPOSED,
-  // COPY,
-  // COS,
-  // DEPTHWISE_CONVOLUTION,
-  // DIV,
-  // ELU,
-  // EQUAL,
-  // EXP,
-  // FULLY_CONNECTED,
-  // GREATER,
-  // GREATER_EQUAL,
-  // HARD_SWISH,
-  // LESS,
-  // LESS_EQUAL,
-  // LOG,
-  // LSTM,
-  // MAXIMUM,
-  // MAX_UNPOOLING_2D,
-  // MEAN,
-  // MEAN_STDDEV_NORMALIZATION,
-  // MINIMUM,
-  // MUL,
-  // NEG,
-  // NOT_EQUAL,
-  // PAD,
-  // POOLING_2D,
-  // POW,
-  // PRELU,
-  // Used to accurately run inference on quantized models.
-  // QUANTIZE_AND_DEQUANTIZE,
-  // REDUCE_MAXIMUM,
-  // REDUCE_MINIMUM,
-  // REDUCE_PRODUCT,
-  // REDUCE_SUM,
-  // RELU,
-  // RESHAPE,
-  // RESIZE,
-  // RSQRT,
-  // SIGMOID,
-  // SIN,
-  // SLICE,
-  // SOFTMAX,
-  // SPACE_TO_BATCH,
-  // SPACE_TO_DEPTH,
-  // SQRT,
-  // SQUARE,
-  // SQUARED_DIFF,
-  // SUB,
-  // TANH,
-  // TRANSPOSE,
-};
-
-std::string ToString(enum OperationType op);
-
-OperationType OperationTypeFromString(const std::string &name);
-
-typedef absl::variant<absl::monostate, InternalTensor<HWC, DataType::FLOAT32>,
-                      InternalTensor<Linear, DataType::FLOAT32>, float>
-  TensorOrScalar;
-
-struct Padding2D
-{
-  Padding2D() = default;
-  Padding2D(const Padding2D &);
-  Padding2D &operator=(const Padding2D &value);
-  bool operator==(const Padding2D &value);
-  bool operator!=(const Padding2D &value);
-  Padding2D &operator-(const Padding2D &value);
-
-  // Padding values for every axis (if needed), where 'prepended' defines
-  // padding for the beginning of each axis and 'appended' represents end part
-  // of the corresponding axis.
-  HW prepended = HW(-1, -1);
-  HW appended = HW(-1, -1);
-};
-
-struct Padding3D
-{
-  Padding3D() = default;
-  Padding3D(const Padding3D &);
-  Padding3D &operator=(const Padding3D &value);
-  bool operator==(const Padding3D &value);
-  bool operator!=(const Padding3D &value);
-  Padding3D &operator-(const Padding3D &value);
-  // Padding values for every axis (if needed), where 'prepended' defines
-  // padding for the beginning of each axis and 'appended' represents end part
-  // of the corresponding axis.
-  HWD prepended = HWD(0, 0, 0);
-  HWD appended = HWD(0, 0, 0);
-};
-
-struct Crop2D : public Padding2D
-{
-};
-
-struct SpaceToBatchAttributes
-{
-  HW block;
-  Padding2D padding;
-};
-
-struct BatchToSpaceAttributes
-{
-  HW block;
-  Crop2D crop;
-};
-
-enum class PoolingType
-{
-  UNDEFINED = 0,
-
-  // average pooling
-  AVERAGE = 1,
-
-  // max pooling
-  MAX = 2,
-};
-
-struct Pooling2DAttributes
-{
-  PoolingType type = PoolingType::UNDEFINED;
-  // Strides for every axis.
-  HW strides = HW(-1, -1);
-  HW kernel = HW(-1, -1);
-  Padding2D padding;
-  // NOTE(akulik): technically the number of outputs from Pooling node indicates
-  // whether indices are needed or not, but I decided to keep it inside
-  // attributes to simplify processing.
-  bool output_indices = false;
-};
-
-struct Pooling3DAttributes
-{
-  PoolingType type = PoolingType::UNDEFINED;
-  // Strides for every axis.
-  HWD strides = HWD(0, 0, 0);
-  HWD kernel = HWD(0, 0, 0);
-  Padding3D padding;
-  // NOTE(akulik): technically the number of outputs from Pooling node indicates
-  // whether indices are needed or not, but I decided to keep it inside
-  // attributes to simplify processing.
-  bool output_indices = false;
-};
-
-struct MaxUnpooling2DAttributes
-{
-  // Strides for every axis.
-  HW strides = HW(-1, -1);
-  HW kernel = HW(-1, -1);
-  Padding2D padding;
-};
-
-struct MaxUnpooling3DAttributes
-{
-  // Strides for every axis.
-  HWD strides = HWD(0, 0, 0);
-  HWD kernel = HWD(0, 0, 0);
-  Padding3D padding;
-};
-
-struct MeanAttributes
-{
-  // The vector of dimensions to calculate mean along.
-  std::set<Axis> dims;
-};
-
-struct ConcatAttributes
-{
-  // Defines axis by which to concat on.
-  Axis axis = Axis::UNKNOWN;
-};
-
-// @return shape of a tensor after MaxUnpooling2D operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return shape of a tensor after MaxUnpooling3D operation is applied to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-// @return shape of a tensor after Pooling2D operation is applied to the given
-//         input.
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return shape of a tensor after Pooling3D operation is applied to the given
-//         input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return shape of a tensor after Concat operation is applied to the given
-//         input.
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
-                                  BHWC *output_shape);
-
-// @return shape of a tensor after Concat operation is applied to the given
-//         input.
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
-                                  BHWDC *output_shape);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-struct Convolution2DAttributes
-{
-  HW strides = HW(1, 1);   // Along each axis.
-  HW dilations = HW(1, 1); // Along each axis.
-  Padding2D padding;
-
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct Convolution3DAttributes
-{
-  HWD strides = HWD(0, 0, 0);   // Along each axis.
-  HWD dilations = HWD(0, 0, 0); // Along each axis.
-  Padding3D padding;
-
-  InternalTensor<OHWDI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-// @return shape of a tensor after Convolution2D operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return shape of a tensor after Convolution3D operation is applied to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr);
-
-struct ConvolutionTransposedAttributes
-{
-  HW stride = HW(1, 1); // Along each axis.
-  HW adjacent;          // TODO(sorokin): No op on Flow.
-  Padding2D padding;
-
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct ConvolutionTransposed3DAttributes
-{
-  HWD stride = HWD(0, 0, 0); // Along each axis.
-  Padding3D padding;
-
-  InternalTensor<OHWDI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed3D operation is applied
-// to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-struct DepthwiseConvolution2DAttributes : public Convolution2DAttributes
-{
-};
-struct DepthwiseConvolution3DAttributes : public Convolution3DAttributes
-{
-};
-
-// @return shape of a tensor after DepthwiseConvolution2D operation is applied
-//         to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return shape of a tensor after DepthwiseConvolution3D operation is applied
-//         to the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// f(x):= {
-//   if x < 0  : x -> alpha * x
-//   if x >= 0 : x -> min(clip, x)
-// }
-//
-// Examples:
-//   - ReLU: clip = 0, alpha = 0
-//   - ReLU6: clip = 6, alpha = 0
-//   - Leaky ReLU: clip = 0, alpha = a
-struct ReLUAttributes
-{
-  // clip <= 0 mean it is not set.
-  float clip = 0;
-
-  float alpha = 0;
-};
-
-struct PReLUAttributes
-{
-  // clip <= 0 mean it is not set.
-  float clip = 0;
-
-  // If alpha is linear, then it is sharded across CHANNELS axis, otherwise
-  // full shape alpha is required.
-  absl::variant<InternalTensor<Linear, DataType::FLOAT32>, InternalTensor<HWC, DataType::FLOAT32>>
-    alpha;
-};
-
-struct ReduceAttributes
-{
-  Axis axis = Axis::UNKNOWN;
-};
-
-struct SoftmaxAttributes
-{
-  Axis axis = Axis::UNKNOWN;
-};
-
-enum LstmKernelType
-{
-  FULL = 0,
-  BASIC = 1, // Currently, only basic is supported.
-};
-
-struct LstmAttributes
-{
-  LstmKernelType kernel_type = LstmKernelType::BASIC;
-};
-
-enum class SamplingType
-{
-  UNKNOWN = 0,
-  NEAREST = 1,
-  BILINEAR = 2,
-};
-
-struct Resize2DAttributes
-{
-  HW new_shape;
-
-  SamplingType type = SamplingType::UNKNOWN;
-
-  // If true, the centers of the 4 corner pixels of the input and output tensors
-  // are aligned, preserving the values at the corner pixels. Defaults to false.
-  bool align_corners = false;
-
-  bool half_pixel_centers = false;
-};
-
-// TODO(b/147771327): rename to Resize3D
-struct Resize3DAttributes
-{
-  HWD new_shape;
-
-  SamplingType type = SamplingType::NEAREST;
-
-  // If true, the centers of the 8 corner pixels of the input and output tensors
-  // are aligned, preserving the values at the corner pixels. Defaults to false.
-  bool align_corners = false;
-
-  bool half_pixel_centers = false;
-};
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr);
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr);
-
-enum class PaddingContentType
-{
-  ZEROS = 0,
-  REFLECT = 1,
-  EDGE = 2,
-};
-
-struct PadAttributes
-{
-  PaddingContentType type = PaddingContentType::ZEROS;
-
-  BHWC prepended;
-  BHWC appended;
-};
-
-// @return shape of a tensor after Pad operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr);
-
-struct Pad3DAttributes
-{
-  PaddingContentType type = PaddingContentType::ZEROS;
-
-  BHWDC prepended;
-  BHWDC appended;
-};
-
-// @return shape of a tensor after Pad3D operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr);
-
-struct ConstTensorAttributes
-{
-  InternalTensor<BHWC, DataType::FLOAT32> tensor;
-};
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct SliceAttributes
-{
-  // Specifies start and end dimensions for slicing.
-  BHWC starts;
-  BHWC ends;
-
-  // Stride should be >= 1.
-  BHWC strides;
-};
-
-// @return shape of a tensor after Slice2D operation is applied to the given
-//         input.
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr);
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct Slice3DAttributes
-{
-  // Specifies start and end dimensions for slicing.
-  BHWDC starts;
-  BHWDC ends;
-
-  // Stride should be >= 1.
-  BHWDC strides;
-};
-
-// @return shape of a tensor after Slice3D operation is applied to the given
-//         input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr);
-
-struct FullyConnectedAttributes
-{
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias;
-};
-
-// @return shape of a tensor after FullyConnected operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr);
-
-// @return shape of a tensor after Mean operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr);
-
-struct ElementwiseAttributes
-{
-  TensorOrScalar param;
-  // For elementwise operation with 2 inputs op(A, B), runtime_tensor_is_second
-  // true when runtime tensor is B(on second position). this is important for
-  // ops that non commutative, for example substract.
-  bool runtime_tensor_is_second = false;
-};
-
-struct ReshapeAttributes
-{
-  BHWC new_shape;
-};
-
-struct Reshape3DAttributes
-{
-  BHWDC new_shape;
-};
-
-struct TransposeAttributes
-{
-  // A permutation of the dimensions of input tensor
-  BHWC perm;
-};
-
-// @return shape of a tensor after Transpose operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr);
-
-struct Transpose3DAttributes
-{
-  // A permutation of the dimensions of input tensor
-  BHWDC perm;
-};
-
-// @return shape of a tensor after Transpose3D operation is applied to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr);
-
-struct SpaceToDepthAttributes
-{
-  int block_size;
-};
-
-// These help perform a combination of Quantize & Dequantize to adjust float
-// values like quantized inference would.
-struct QuantizeAndDequantizeAttributes
-{
-  float min = 0;
-  float max = 0;
-  float scale = 0;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc b/runtime/onert/backend/gpu_cl/open_cl/Precision.cc

deleted file mode 100644 (file)

index bd908bd..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string ToString(CalculationsPrecision precision)
-{
-  switch (precision)
-  {
-    case CalculationsPrecision::F32_F16:
-      return "CalculationsPrecision::F32_F16";
-    case CalculationsPrecision::F32:
-      return "CalculationsPrecision::F32";
-    case CalculationsPrecision::F16:
-      return "CalculationsPrecision::F16";
-  }
-  return " ";
-}
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision)
-{
-  if (precision == CalculationsPrecision::F32)
-  {
-    return DataType::FLOAT32;
-  }
-  else
-  {
-    return DataType::FLOAT16;
-  }
-  return DataType::UNKNOWN;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.h b/runtime/onert/backend/gpu_cl/open_cl/Precision.h

deleted file mode 100644 (file)

index cb910c7..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-
-#include <string>
-
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CalculationsPrecision
-{
-  F32,
-  F32_F16,
-  F16
-};
-// F32 - all data and all math ops in F32
-// F16 - all data and all math ops in F16
-// F32_F16 - as F16, but some operations (Convolution,
-// DepthwiseConvolution, FullyConnected, ConvolutionTransposed)
-// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
-// than converts this partial sum to F32 and add to accumulator.
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);
-
-std::string ToString(CalculationsPrecision precision);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc

deleted file mode 100644 (file)

index 350d7a1..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ProgramCache.h"
-
-#include <cstdint>
-#include <string>
-
-#include "ClProgram.h"
-#include "Status.h"
-#include "Util.h"
-#include "farmhash.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(const std::string &code_text,
-                                                   const std::string &options,
-                                                   bool use_fingerprints)
-  : code(code_text), compiler_options(options), use_fingerprint(use_fingerprints)
-{
-  const uint64_t code_fingerprint = ::util::Fingerprint64(code);
-  const uint64_t options_fingerprint = ::util::Fingerprint64(compiler_options);
-  fingerprint = code_fingerprint + options_fingerprint;
-}
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(uint64_t fingerprints)
-  : fingerprint(fingerprints), use_fingerprint(true)
-{
-}
-
-ProgramCache::ProgramCache(ProgramCache &&program_cache)
-  : use_fingerprints_(program_cache.use_fingerprints_),
-    programs_(std::move(program_cache.programs_))
-{
-}
-
-ProgramCache &ProgramCache::operator=(ProgramCache &&program_cache)
-{
-  if (this != &program_cache)
-  {
-    use_fingerprints_ = program_cache.use_fingerprints_;
-    programs_ = std::move(program_cache.programs_);
-  }
-  return *this;
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
-                                               const std::string &function_name,
-                                               const std::vector<CompilerOptions> &compiler_options,
-                                               const CLContext &context, const CLDevice &device,
-                                               CLKernel *result)
-{
-  const std::string options = CompilerOptionsToString(device, compiler_options);
-  ProgramDescriptor desc{code, options, use_fingerprints_};
-  auto it = programs_.find(desc);
-  if (it != programs_.end())
-  {
-    return result->CreateFromProgram(it->second, function_name);
-  }
-
-  CLProgram program;
-  RETURN_IF_ERROR(CreateCLProgram(code, options, context, device, &program));
-  RETURN_IF_ERROR(result->CreateFromProgram(program, function_name));
-  programs_.insert(std::make_pair(std::move(desc), std::move(program)));
-  return absl::OkStatus();
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
-                                               const std::string &function_name,
-                                               const CLContext &context, const CLDevice &device,
-                                               CLKernel *result)
-{
-  return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h

deleted file mode 100644 (file)

index 3f5ee02..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/container/flat_hash_map.h"
-#include "absl/types/span.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClKernel.h"
-#include "ClProgram.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ProgramCache
-{
-public:
-  ProgramCache() = default;
-
-  ProgramCache(ProgramCache &&program_cache);
-  ProgramCache &operator=(ProgramCache &&program_cache);
-  ProgramCache(const ProgramCache &) = delete;
-  ProgramCache &operator=(const ProgramCache &) = delete;
-
-  absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
-                                   const std::vector<CompilerOptions> &compiler_options,
-                                   const CLContext &context, const CLDevice &device,
-                                   CLKernel *result);
-
-  absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
-                                   const CLContext &context, const CLDevice &device,
-                                   CLKernel *result);
-
-private:
-  struct ProgramDescriptor
-  {
-    ProgramDescriptor() = default;
-    ProgramDescriptor(const std::string &code_text, const std::string &options,
-                      bool use_fingerprint);
-    explicit ProgramDescriptor(uint64_t fingerprint);
-
-    std::string code;
-    std::string compiler_options;
-    uint64_t fingerprint;
-    bool use_fingerprint;
-  };
-  struct ProgramDescriptorHasher
-  {
-    std::size_t operator()(const ProgramDescriptor &k) const
-    {
-      if (k.use_fingerprint)
-      {
-        return std::hash<uint64_t>()(k.fingerprint);
-      }
-      else
-      {
-        return std::hash<std::string>()(k.code) + std::hash<std::string>()(k.compiler_options);
-      }
-    }
-  };
-  struct ProgramDescriptorEqual
-  {
-    bool operator()(const ProgramDescriptor &a, const ProgramDescriptor &b) const
-    {
-      if (a.use_fingerprint && b.use_fingerprint)
-      {
-        return a.fingerprint == b.fingerprint;
-      }
-      else
-      {
-        return a.compiler_options == b.compiler_options && a.code == b.code;
-      }
-    }
-  };
-
-  // There is a low probability of a hash collision when cache is deserialized
-  // because only fingerprints are serialized instead of full source code.
-  bool use_fingerprints_ = false;
-  absl::flat_hash_map<ProgramDescriptor, CLProgram, ProgramDescriptorHasher, ProgramDescriptorEqual>
-    programs_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc b/runtime/onert/backend/gpu_cl/open_cl/Shape.cc

deleted file mode 100644 (file)

index 5a23745..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Shape.h"
-
-#include <stdint.h>
-
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_join.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct GetAxisByIndexFunc
-{
-  template <Layout T> Axis operator()() const { return GetAxis<T>(index); }
-  int32_t index;
-};
-
-struct GetIndexByAxisFunc
-{
-  template <Layout T> int operator()() const { return GetAxisIndex<T>(axis); }
-  Axis axis;
-};
-
-struct NumAxisFunc
-{
-  template <Layout T> int operator()() const { return Size<T>(); }
-};
-
-} // namespace
-
-std::string ToString(Axis axis)
-{
-  switch (axis)
-  {
-    case Axis::BATCH:
-      return "batch";
-    case Axis::CHANNELS:
-      return "channels";
-    case Axis::INPUT_CHANNELS:
-      return "input_channels";
-    case Axis::OUTPUT_CHANNELS:
-      return "output_channels";
-    case Axis::HEIGHT:
-      return "height";
-    case Axis::WIDTH:
-      return "width";
-    case Axis::VALUE:
-      return "value";
-    case Axis::DEPTH:
-      return "depth";
-    case Axis::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-std::string ToString(Layout layout)
-{
-  switch (layout)
-  {
-    case Layout::SCALAR:
-      return "scalar";
-    case Layout::LINEAR:
-      return "linear";
-    case Layout::HW:
-      return "hw";
-    case Layout::HWD:
-      return "hwd";
-    case Layout::CHW:
-      return "chw";
-    case Layout::HWC:
-      return "hwc";
-    case Layout::HWDC:
-      return "hwdc";
-    case Layout::OHWI:
-      return "ohwi";
-    case Layout::IHWO:
-      return "ihwo";
-    case Layout::OIHW:
-      return "oihw";
-    case Layout::IOHW:
-      return "iohw";
-    case Layout::BHWC:
-      return "bhwc";
-    case Layout::BHWDC:
-      return "bhwdc";
-    case Layout::OHWDI:
-      return "ohwi";
-    case Layout::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-Axis GetAxis(Layout layout, int32_t index)
-{
-  return DispatchByLayout(layout, GetAxisByIndexFunc{index});
-}
-
-int GetAxisIndex(Layout layout, Axis axis)
-{
-  return DispatchByLayout(layout, GetIndexByAxisFunc{axis});
-}
-
-bool HasAxis(Layout layout, Axis axis) { return GetAxisIndex(layout, axis) >= 0; }
-
-int Size(Layout layout) { return DispatchByLayout(layout, NumAxisFunc()); }
-
-std::string ToString(const Shape &s)
-{
-  return absl::StrCat("{", ToString(s.layout), ", {", absl::StrJoin(s.dimensions, ", "), "}}");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.h b/runtime/onert/backend/gpu_cl/open_cl/Shape.h

deleted file mode 100644 (file)

index 3767e10..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.h
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include <array>
-#include <functional>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Axis
-{
-  UNKNOWN = 0,
-  CHANNELS = 1,
-  INPUT_CHANNELS = 2,
-  OUTPUT_CHANNELS = 3,
-  HEIGHT = 4,
-  WIDTH = 5,
-  BATCH = 6,
-  VALUE = 7,
-  DEPTH = 8,
-};
-
-std::string ToString(Axis t);
-
-// Layout represents axis order.
-enum class Layout
-{
-  UNKNOWN = 0,
-  SCALAR = 1,
-  LINEAR = 2,
-  HW = 3,
-  CHW = 4,
-  HWC = 5,
-  OIHW = 6,
-  OHWI = 7,
-  IHWO = 8,
-  IOHW = 9,
-  BHWC = 10,
-  HWDC = 11,
-  BHWDC = 12,
-  HWD = 13,
-  OHWDI = 14,
-};
-
-std::string ToString(Layout l);
-
-// Returns number of axis for the fixed layout.
-template <Layout T> constexpr int Size();
-
-// Returns number of axis for the given layout.
-int Size(Layout layout);
-
-// Returns Axis for the given index and fixed layout.
-template <Layout T> constexpr Axis GetAxis(int index);
-
-// Returns axis for the given layout and index.
-Axis GetAxis(Layout layout, int32_t index);
-
-// Returns axis index for the given axis and fixed layout.
-template <Layout T> constexpr int GetAxisIndex(Axis axis);
-
-// Returns axis index for the given layout and axis.
-int GetAxisIndex(Layout layout, Axis axis);
-
-// Checks if fixed layout has given axis
-template <Layout T> constexpr bool HasAxis(Axis axis);
-
-// Checks if given layout has given axis
-bool HasAxis(Layout layout, Axis axis);
-
-// Stores Layout(axis set and order) and value for dimensions.
-struct Shape
-{
-  Shape() : layout(Layout::UNKNOWN), dimensions() {}
-
-  explicit Shape(Layout t) : layout(t), dimensions(Size(t)) {}
-
-  Shape(Layout t, std::vector<int32_t> d) : layout(t), dimensions(std::move(d)) {}
-
-  bool operator==(const Shape &other) const
-  {
-    return (layout == other.layout) && (dimensions == other.dimensions);
-  }
-
-  bool operator!=(const Shape &other) const { return !operator==(other); }
-
-  // All methods below are matching same methods defined in StrongShape to
-  // make sure generic algorithms work both ways.
-
-  // Returns back a dimension or -1 if it is not found.
-  template <Axis D> int32_t get() const;
-  int32_t get(Axis axis) const;
-
-  template <Axis D> bool set(int32_t t);
-  bool set(Axis axis, int32_t t);
-
-  Axis axis(int index) const { return GetAxis(layout, index); }
-
-  int index(Axis axis) const { return GetAxisIndex(layout, axis); }
-
-  bool has(Axis axis) const { return HasAxis(layout, axis); }
-
-  int64_t DimensionsProduct() const
-  {
-    return std::accumulate(dimensions.begin(), dimensions.end(), 1ll, std::multiplies<int64_t>());
-  }
-
-  Layout layout = Layout::UNKNOWN;
-
-  std::vector<int32_t> dimensions;
-};
-
-std::string ToString(const Shape &s);
-
-// StrongShape provides convenient explicit access to dimensions stored in
-// shape, e.g. StrongShape<Layout::HW> s; provides s.h and s.w accessors.
-//
-// There is a conversion possible both ways between Shape and StrongShape.
-//
-//   OIHW oihw;  // specific shape
-//   Shape l = oihw.ToShape();
-//
-//   OHWI other;  // notice not the same but compatible shape.
-//   if (!other.Adopt(l)) {
-//     // error handling
-//   }
-//
-// StrongShape supports the following set of operations:
-//
-//   // Returns number of axis in the shape class.
-//   static constexpr int size();
-//
-//   // Returns Axis for the given index or Axis::UNKNOWN if index
-//   // falls outside of the defined range in this shape.
-//   static constexpr Axis axis(int index);
-//
-//   // Returns index for the given axis or -1 if axis is not defined in this
-//   // shape.
-//   static constexpr int index(Axis axis);
-//
-//   // Getters
-//   int32_t get(int index) const;
-//   int32_t get(Axis axis) const;
-//   int32_t get<Axis>() const;
-//
-//   // Setters that return false if set was not successful.
-//   bool set(int index, int32_t v);
-//   bool set(Axis axis, int32_t v);
-//   bool set<Axis>(int32_t v);
-//
-//   // Returns shape's layout.
-//   static const Layout layout;
-//
-//   // Turns specific shape into generic shape.
-//   Shape ToShape() const;
-//
-//   // Copies all dimensions from the given shape.
-//   bool Adopt(const Shape&);
-//
-template <Layout L> struct StrongShape;
-
-using Scalar = StrongShape<Layout::SCALAR>;
-using Linear = StrongShape<Layout::LINEAR>;
-using HW = StrongShape<Layout::HW>;
-using HWD = StrongShape<Layout::HWD>;
-
-// Common tensor shape for CNN models working with images.
-using CHW = StrongShape<Layout::CHW>;
-using HWC = StrongShape<Layout::HWC>;
-using HWDC = StrongShape<Layout::HWDC>;
-using BHWC = StrongShape<Layout::BHWC>;
-using BHWDC = StrongShape<Layout::BHWDC>;
-
-// Tensor shape used in convolution_2d weights.
-using OIHW = StrongShape<Layout::OIHW>;
-using OHWI = StrongShape<Layout::OHWI>;
-using IHWO = StrongShape<Layout::IHWO>;
-using IOHW = StrongShape<Layout::IOHW>;
-
-// Tensor shape used in convolution_3d weights.
-using OHWDI = StrongShape<Layout::OHWDI>;
-
-// -----------------------------------------------------------------------------
-// Everything below are internal implementation details.
-// -----------------------------------------------------------------------------
-
-namespace internal_shape
-{
-
-template <Axis T> struct AxisTraits;
-
-#define TFLITE_GPU_AXIS_TRAITS(AxisName, HolderName)    \
-  template <> struct AxisTraits<Axis::AxisName>         \
-  {                                                     \
-    struct Holder                                       \
-    {                                                   \
-      int32_t HolderName;                               \
-                                                        \
-    protected:                                          \
-      int32_t operator()() const { return HolderName; } \
-      void operator()(int32_t v) { HolderName = v; }    \
-    };                                                  \
-                                                        \
-    using dimension_holder_type = Holder;               \
-  }
-
-TFLITE_GPU_AXIS_TRAITS(CHANNELS, c);
-TFLITE_GPU_AXIS_TRAITS(HEIGHT, h);
-TFLITE_GPU_AXIS_TRAITS(WIDTH, w);
-TFLITE_GPU_AXIS_TRAITS(INPUT_CHANNELS, i);
-TFLITE_GPU_AXIS_TRAITS(OUTPUT_CHANNELS, o);
-TFLITE_GPU_AXIS_TRAITS(BATCH, b);
-TFLITE_GPU_AXIS_TRAITS(VALUE, v);
-TFLITE_GPU_AXIS_TRAITS(DEPTH, d);
-
-#undef TFLITE_GPU_AXIS_TRAITS
-
-template <int N, Axis... As> struct StrongShapeImpl;
-
-template <int N> struct StrongShapeImpl<N>
-{
-  static constexpr int size() { return N; }
-
-  static constexpr Axis axis(int) { return Axis::UNKNOWN; }
-
-  static constexpr int index(Axis) { return -1; }
-
-  static constexpr bool has(Axis) { return false; }
-
-  int32_t get(Axis) const { return -1; }
-
-  int32_t get(int) const { return -1; }
-
-  template <Axis B> int32_t get() const { return -1; }
-
-  bool set(Axis, int32_t) { return false; }
-
-  bool set(int, int32_t) { return false; }
-
-  template <Axis B> bool set(int32_t) { return false; }
-};
-
-// Used to deduce number of axis, and to be a child of a proper holder to
-// provide access to the dimension by name
-template <int N, Axis A, Axis... As>
-struct StrongShapeImpl<N, A, As...> : public AxisTraits<A>::dimension_holder_type,
-                                      public StrongShapeImpl<N + 1, As...>
-{
-  using dimension_holder_type = typename AxisTraits<A>::dimension_holder_type;
-
-  using rest_type = StrongShapeImpl<N + 1, As...>;
-
-  StrongShapeImpl() : dimension_holder_type{0}, rest_type() {}
-
-  template <typename... Ts>
-  explicit StrongShapeImpl(int32_t t, Ts... ts) : dimension_holder_type{t}, rest_type(ts...)
-  {
-  }
-
-  static constexpr Axis axis(int index) { return index == N ? A : rest_type::axis(index); }
-
-  static constexpr int index(Axis axis) { return axis == A ? N : rest_type::index(axis); }
-
-  static constexpr bool has(Axis axis) { return axis == A ? true : rest_type::has(axis); }
-
-  int32_t get(Axis axis) const
-  {
-    return axis == A ? dimension_holder_type::operator()() : rest_type::get(axis);
-  }
-
-  template <Axis B> int32_t get() const
-  {
-    return B == A ? dimension_holder_type::operator()() : rest_type::template get<B>();
-  }
-
-  int32_t get(int index) const
-  {
-    return index == N ? dimension_holder_type::operator()() : rest_type::get(index);
-  }
-
-  bool set(Axis axis, int32_t t)
-  {
-    if (axis == A)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::set(axis, t);
-  }
-
-  bool set(int index, int32_t t)
-  {
-    if (index == N)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::set(index, t);
-  }
-
-  template <Axis B> bool set(int32_t t)
-  {
-    if (A == B)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::template set<B>(t);
-  }
-};
-
-template <Layout T> struct LayoutTraits;
-
-#define TFLITE_GPU_LAYOUT_TRAITS(LayoutName, ...)              \
-  template <> struct LayoutTraits<Layout::LayoutName>          \
-  {                                                            \
-    using strong_shape_type = StrongShapeImpl<0, __VA_ARGS__>; \
-  }
-
-TFLITE_GPU_LAYOUT_TRAITS(HW, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWD, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH);
-TFLITE_GPU_LAYOUT_TRAITS(OHWI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
-                         Axis::INPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OIHW, Axis::OUTPUT_CHANNELS, Axis::INPUT_CHANNELS, Axis::HEIGHT,
-                         Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IOHW, Axis::INPUT_CHANNELS, Axis::OUTPUT_CHANNELS, Axis::HEIGHT,
-                         Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IHWO, Axis::INPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
-                         Axis::OUTPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(CHW, Axis::CHANNELS, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWC, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(HWDC, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(LINEAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(SCALAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(BHWC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(BHWDC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
-                         Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OHWDI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
-                         Axis::INPUT_CHANNELS);
-
-#undef TFLITE_GPU_LAYOUT_TRAITS
-
-template <> struct LayoutTraits<Layout::UNKNOWN>
-{
-  using strong_shape_type = StrongShapeImpl<0>;
-};
-
-template <Axis A> struct DimensionGetterFixedAxisFunc
-{
-  template <Layout T> int32_t operator()() const
-  {
-    constexpr int i = GetAxisIndex<T>(A);
-    return i >= 0 && i < l->dimensions.size() ? l->dimensions[i] : -1;
-  }
-  const Shape *l;
-};
-
-struct DimensionGetterFunc
-{
-  template <Layout T> int32_t operator()() const
-  {
-    uint32_t i = GetAxisIndex<T>(axis);
-    return i < l->dimensions.size() ? l->dimensions[i] : -1;
-  }
-  Axis axis;
-  const Shape *l;
-};
-
-template <Axis A> struct DimensionSetterFixedAxisFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    constexpr uint32_t i = GetAxisIndex<T>(A);
-    if (i < l->dimensions.size())
-    {
-      l->dimensions[i] = v;
-      return true;
-    }
-    return false;
-  }
-  Shape *l;
-  int32_t v;
-};
-
-struct DimensionSetterFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    uint32_t i = GetAxisIndex<T>(axis);
-    if (i < l->dimensions.size())
-    {
-      l->dimensions[i] = v;
-      return true;
-    }
-    return false;
-  }
-  Axis axis;
-  Shape *l;
-  int32_t v;
-};
-
-template <Layout L> struct ToShapeFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    for (int i = 0; i < StrongShape<L>::size(); ++i)
-    {
-      int index = GetAxisIndex<T>(StrongShape<L>::axis(i));
-      if (index < 0)
-        return false;
-      shape->set(i, l.dimensions[index]);
-    }
-    return true;
-  }
-
-  StrongShape<L> *shape;
-  const Shape &l;
-};
-
-} // namespace internal_shape
-
-// template <Axis... As>
-template <Layout L> struct StrongShape : public internal_shape::LayoutTraits<L>::strong_shape_type
-{
-  using strong_shape_type = typename internal_shape::LayoutTraits<L>::strong_shape_type;
-  StrongShape() = default;
-
-  template <typename... Ts> explicit StrongShape(Ts... t) : strong_shape_type(t...) {}
-
-  constexpr static Layout layout = L;
-
-  bool operator==(const StrongShape<L> &shape) const
-  {
-    // TODO(akulik): implement better alternative.
-    return this->ToShape() == shape.ToShape();
-  }
-
-  bool operator!=(const StrongShape<L> &shape) const
-  {
-    // TODO(akulik): implement better alternative.
-    return this->ToShape() != shape.ToShape();
-  }
-  bool empty() const { return DimensionsProduct() == 0; }
-
-  // Turns StrongShape into generic shape.
-  Shape ToShape() const
-  {
-    std::vector<int32_t> dimensions(StrongShape::size());
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      dimensions[i] = StrongShape::get(i);
-    }
-    return Shape(L, std::move(dimensions));
-  }
-
-  // @return all dimensions multiplied
-  int64_t DimensionsProduct() const
-  {
-    int64_t product = 1;
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      product *= StrongShape::get(i);
-    }
-    return product;
-  }
-
-  // Translates given coordinates of the layout into a linear index assuming
-  // dimensions are sorted in tensor access order e.g. if you access
-  // foobar[i][j][k] order of coordinates should be i,j,k.
-  int64_t LinearIndex(const std::array<int32_t, StrongShape::size()> &coordinates) const
-  {
-    int64_t index = coordinates[0];
-    for (int i = 1; i < StrongShape::size(); ++i)
-    {
-      index = index * StrongShape::get(i) + coordinates[i];
-    }
-    return index;
-  }
-
-  // Copies all dimensions from the given generic shape into specific shape.
-  // It requires shape to have all axis defined in the given
-  // StrongShape. For example:
-  //   - If this shape is OHWI but given shape is OIHW, Adopt will copy all
-  //     dimensions and return true.
-  //   - If this shape is OIHW but input shape is HW, Adopt will copy H and W
-  //     dimensions and return true, but if this shape is HW and given shape
-  //     OIHW, then Adopt will return false because not all axis are present in
-  //     the input shape.
-  //
-  // @return false if generic shape is not compatible.
-  bool Adopt(const Shape &shape)
-  {
-    return DispatchByLayout(shape.layout, internal_shape::ToShapeFunc<L>{this, shape});
-  }
-
-  // For all axis defined in a given shape copies values to this shape.
-  // Therefore, it is possible to copy dimensions from CHW to BCHW, but not
-  // the other way around.
-  //
-  // BCHW bchw;
-  // CHW chw;
-  // bchw.CopyAllGivenAxis(chw);  --> true
-  // chw.CopyAllGivenAxis(bchw);  --> false
-  //
-  // @return false if axis in source shape is not defined here, thus value
-  //         was not copied.
-  template <Layout B> bool CopyAllGivenAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < source.size(); ++i)
-    {
-      if (!StrongShape::set(source.axis(i), source.get(i)))
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // For all axis defined in this shape copies values from the given shape.
-  //
-  // BCHW bchw;
-  // CHW chw;
-  // bchw.CopyAllDefinedAxis(chw);  --> false
-  // chw.CopyAllDefinedAxis(bchw);  --> true
-  //
-  // @return false if given shape does not have axis defined here,
-  //         therefore a value was not copied.
-  template <Layout B> bool CopyAllDefinedAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      int source_index = source.index(StrongShape::axis(i));
-      if (source_index < 0)
-      {
-        return false;
-      }
-      StrongShape::set(i, source.get(source_index)); // always true
-    }
-    return true;
-  }
-
-  // Copies values only for matching axis.
-  template <Layout B> void CopyMatchingAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      StrongShape::set(source.axis(i), source.get(i));
-    }
-  }
-
-  // AbslHash function for using in flat hash containers.
-  template <typename H> friend H AbslHashValue(H hash_state, const StrongShape &strong_shape)
-  {
-    for (size_t i = 0; i < strong_shape.size(); ++i)
-    {
-      hash_state = H::combine(std::move(hash_state), strong_shape.get(i));
-    }
-    return hash_state;
-  }
-};
-
-template <Layout T> inline std::string ToString(const StrongShape<T> &s)
-{
-  return ToString(s.ToShape());
-}
-
-template <Layout L> constexpr Layout StrongShape<L>::layout;
-
-template <class F>
-auto DispatchByLayout(Layout type, F f) -> decltype(f.template operator()<Layout::UNKNOWN>())
-{
-  switch (type)
-  {
-    case Layout::HW:
-      return f.template operator()<Layout::HW>();
-    case Layout::HWD:
-      return f.template operator()<Layout::HWD>();
-    case Layout::HWC:
-      return f.template operator()<Layout::HWC>();
-    case Layout::HWDC:
-      return f.template operator()<Layout::HWDC>();
-    case Layout::CHW:
-      return f.template operator()<Layout::CHW>();
-    case Layout::OIHW:
-      return f.template operator()<Layout::OIHW>();
-    case Layout::IOHW:
-      return f.template operator()<Layout::IOHW>();
-    case Layout::OHWI:
-      return f.template operator()<Layout::OHWI>();
-    case Layout::IHWO:
-      return f.template operator()<Layout::IHWO>();
-    case Layout::LINEAR:
-      return f.template operator()<Layout::LINEAR>();
-    case Layout::SCALAR:
-      return f.template operator()<Layout::SCALAR>();
-    case Layout::BHWC:
-      return f.template operator()<Layout::BHWC>();
-    case Layout::BHWDC:
-      return f.template operator()<Layout::BHWDC>();
-    case Layout::OHWDI:
-      return f.template operator()<Layout::OHWDI>();
-    case Layout::UNKNOWN:
-      return f.template operator()<Layout::UNKNOWN>();
-  }
-  return f.template operator()<Layout::UNKNOWN>();
-}
-
-template <Layout T> constexpr int Size() { return StrongShape<T>::size(); }
-
-template <Layout T> constexpr Axis GetAxis(int index) { return StrongShape<T>::axis(index); }
-
-template <Layout T> constexpr int GetAxisIndex(Axis axis) { return StrongShape<T>::index(axis); }
-
-template <Layout T> constexpr bool HasAxis(Axis axis) { return StrongShape<T>::has(axis); }
-
-template <Axis D> inline int32_t Shape::get() const
-{
-  return DispatchByLayout(layout, internal_shape::DimensionGetterFixedAxisFunc<D>{this});
-}
-
-inline int32_t Shape::get(Axis axis) const
-{
-  return DispatchByLayout(layout, internal_shape::DimensionGetterFunc{axis, this});
-}
-
-template <Axis D> inline bool Shape::set(int32_t t)
-{
-  return DispatchByLayout(layout, internal_shape::DimensionSetterFixedAxisFunc<D>{this, t});
-}
-
-inline bool Shape::set(Axis axis, int32_t t)
-{
-  return DispatchByLayout(layout, internal_shape::DimensionSetterFunc{axis, this, t});
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Spi.h b/runtime/onert/backend/gpu_cl/open_cl/Spi.h

deleted file mode 100644 (file)

index c1d65b6..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Spi.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-
-#include <cstdint>
-
-#include "Api.h"
-#include "AccessType.h"
-#include "Status.h"
-
-// Contains only service provider-related interfaces. Users should not use them
-// directly.
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Converts a tensor object into another one.
-class TensorObjectConverter
-{
-public:
-  virtual ~TensorObjectConverter() = default;
-
-  virtual absl::Status Convert(const TensorObject &input, const TensorObject &output) = 0;
-};
-
-class TensorObjectConverterBuilder
-{
-public:
-  virtual ~TensorObjectConverterBuilder() = default;
-
-  virtual bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const = 0;
-
-  virtual absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
-                                     std::unique_ptr<TensorObjectConverter> *converter) = 0;
-};
-
-// Connects tensor definition provided by a user (external) with tensor
-// definition used by the inference engine (internal).
-struct TensorTieDef
-{
-  uint32_t id;
-  AccessType access_type;
-  TensorObjectDef internal_def;
-  TensorObjectDef external_def;
-};
-
-// Connects external tensor object to internal tensor object and provides
-// functionality to copy data to/from external object to internal.
-class TensorTie
-{
-public:
-  explicit TensorTie(const TensorTieDef &def) : def_(def) {}
-
-  virtual ~TensorTie() = default;
-
-  virtual absl::Status SetExternalObject(TensorObject obj) = 0;
-
-  virtual TensorObject GetExternalObject() = 0;
-
-  virtual absl::Status CopyToExternalObject() = 0;
-
-  virtual absl::Status CopyFromExternalObject() = 0;
-
-  const TensorTieDef &def() const { return def_; }
-
-private:
-  const TensorTieDef def_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Status.h b/runtime/onert/backend/gpu_cl/open_cl/Status.h

deleted file mode 100644 (file)

index 6295a7e..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Status.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
-
-#include "absl/status/status.h" // IWYU pragma: export
-#define RETURN_IF_ERROR(s) \
-  {                        \
-    auto c = (s);          \
-    if (!c.ok())           \
-      return c;            \
-  } // IWYU pragma: export
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc

deleted file mode 100644 (file)

index eada697..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StorageTypeUtil.h"
-
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
-                              const TensorDescriptor &descriptor)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  switch (descriptor.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    {
-      const uint64_t flt4_size = 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
-      const uint64_t buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
-      return buffer_size <= device_info.buffer_max_size;
-    }
-    case TensorStorageType::IMAGE_BUFFER:
-      return (uint64_t)shape.b * shape.w * shape.h * shape.d * slices <=
-             device_info.image_buffer_max_size;
-    case TensorStorageType::TEXTURE_3D:
-      if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1)
-      {
-        // clCreateImage3D (that used in CL 1.0/1.1) can not create image with
-        // depth = 1 by specification;
-        return false;
-      }
-      return (uint64_t)shape.w * shape.b <= device_info.image3d_max_width &&
-             (uint64_t)shape.h <= device_info.image3d_max_height &&
-             (uint64_t)slices * shape.d <= device_info.image3d_max_depth;
-    case TensorStorageType::TEXTURE_ARRAY:
-      // Bug on some Adreno. b/131099086
-      if (slices == 1 && !device_info.SupportsOneLayerTextureArray())
-      {
-        return false;
-      }
-      return (uint64_t)shape.w * shape.b <= device_info.image2d_max_width &&
-             (uint64_t)shape.h <= device_info.image2d_max_height &&
-             (uint64_t)slices * shape.d <= device_info.image_array_max_layers;
-    case TensorStorageType::TEXTURE_2D:
-      return (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
-             (uint64_t)shape.h * slices <= device_info.image2d_max_height;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return (uint64_t)shape.c <= 4 &&
-             device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
-             (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
-             (uint64_t)shape.h <= device_info.image2d_max_height;
-    default:
-      return false;
-  }
-}
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
-                              const TensorDescriptor &descriptor)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CanCreateTensorWithShape(device_info, shape5D, descriptor);
-}
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
-                                        const TensorStorageType &desired, const DataType &data_type,
-                                        const Layout &layout)
-{
-  if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout}))
-  {
-    return desired;
-  }
-  auto GetBestTypeAfterTextureArray = [&]() {
-    if (device_info.SupportsImageBuffer() &&
-        CanCreateTensorWithShape(
-          device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout}))
-    {
-      return TensorStorageType::IMAGE_BUFFER;
-    }
-    else
-    {
-      return TensorStorageType::BUFFER;
-    }
-  };
-  auto GetBestTypeAfterTexture2D = [&]() {
-    if (device_info.SupportsTextureArray() &&
-        CanCreateTensorWithShape(
-          device_info, shape,
-          TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout}))
-    {
-      return TensorStorageType::TEXTURE_ARRAY;
-    }
-    else
-    {
-      return GetBestTypeAfterTextureArray();
-    }
-  };
-  auto GetBestTypeAfterTexture3D = [&]() {
-    if (CanCreateTensorWithShape(
-          device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout}))
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-    else
-    {
-      return GetBestTypeAfterTexture2D();
-    }
-  };
-  switch (desired)
-  {
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return GetBestTypeAfterTexture2D();
-    case TensorStorageType::TEXTURE_ARRAY:
-      return GetBestTypeAfterTextureArray();
-    case TensorStorageType::TEXTURE_3D:
-      return GetBestTypeAfterTexture3D();
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::BUFFER:
-      return TensorStorageType::BUFFER;
-    default:
-      return TensorStorageType::BUFFER;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h

deleted file mode 100644 (file)

index a84c386..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-
-#include "DeviceInfo.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
-                              const TensorDescriptor &descriptor);
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
-                              const TensorDescriptor &descriptor);
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
-                                        const TensorStorageType &desired, const DataType &data_type,
-                                        const Layout &layout);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc b/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc

deleted file mode 100644 (file)

index 983e0d2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-
-#include "Buffer.h"
-#include "ClImageFormat.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "InternalTensor.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, const void *data_ptr,
-                                  CLMemory *result)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
-  if (data_ptr)
-  {
-    mem_flags |= CL_MEM_COPY_HOST_PTR;
-  }
-  switch (descriptor.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      const size_t data_size =
-        shape.b * shape.w * shape.h * shape.d * slices * 4 * SizeOf(descriptor.data_type);
-      cl_int error_code;
-      cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
-                                     const_cast<void *>(data_ptr), &error_code);
-      if (!memory)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code)));
-      }
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_2D:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-      desc.image_width = shape.w * shape.b * shape.d;
-      desc.image_height = shape.h * slices;
-      desc.image_depth = 0;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
-                                               CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_3D:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-      desc.image_width = shape.w * shape.b;
-      desc.image_height = shape.h;
-      desc.image_depth = slices * shape.d;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat("Failed to create 3D texture (clCreateImage): ",
-                                               CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_ARRAY:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-      desc.image_width = shape.w * shape.b;
-      desc.image_height = shape.h;
-      desc.image_depth = 0;
-      desc.image_array_size = slices * shape.d;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = clCreateImage(context.context(), mem_flags, &format, &desc,
-                                    const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to create 2D texture array (clCreateImage): ", CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    {
-      if (slices != 1)
-      {
-        return absl::InvalidArgumentError(absl::StrCat(
-          "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", shape.c, "was provided"));
-      }
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-      desc.image_width = shape.w * shape.b * shape.d;
-      desc.image_height = shape.h;
-      desc.image_depth = 0;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type))
-      {
-        format.image_channel_order = ToChannelOrder(shape.c);
-        format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-      }
-      else
-      {
-        return absl::InvalidArgumentError(
-          absl::StrCat("This device doesn't support ", shape.c, "-channel textures."));
-      }
-
-      cl_int error_code;
-      cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to create single 2D texture (clCreateImage): ", CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-}
-
-absl::Status CreateImageBufferFromBuffer(const CLContext &context, cl_mem memory,
-                                         DataType data_type, int width, cl_mem *result)
-{
-  cl_image_format format;
-  cl_image_desc desc;
-  std::memset(&desc, 0, sizeof(desc));
-  desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
-  desc.image_width = width;
-  desc.mem_object = memory;
-
-  format.image_channel_data_type = ToImageChannelType(data_type);
-  format.image_channel_order = CL_RGBA;
-
-  cl_int error_code;
-  *result =
-    clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc, nullptr, &error_code);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
-  const bool memory_owner = memory == nullptr;
-  if (memory_owner)
-  {
-    CLMemory mem;
-    RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
-    memory = mem.Release();
-  }
-  if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    cl_mem image_memory;
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(
-      context, memory, descriptor.data_type,
-      shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
-    *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
-  }
-  else
-  {
-    *result = Tensor(memory, memory_owner, shape, descriptor);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensorShared(const CLContext &context, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
-  const bool memory_owner = false;
-  if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    cl_mem image_memory;
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(
-      context, memory, descriptor.data_type,
-      shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
-    *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
-  }
-  else
-  {
-    *result = Tensor(memory, memory_owner, shape, descriptor);
-  }
-  return absl::OkStatus();
-}
-
-} // namespace
-
-absl::Status TensorDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Tensor gpu_tensor;
-  RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context));
-  *result = absl::make_unique<Tensor>(std::move(gpu_tensor));
-  return absl::OkStatus();
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner),
-    shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner), shape_(shape),
-    descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
-    shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
-    shape_(shape), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(Tensor &&tensor)
-  : memory_(tensor.memory_), image_buffer_memory_(tensor.image_buffer_memory_),
-    memory_owner_(tensor.memory_owner_), shape_(tensor.shape_), descriptor_(tensor.descriptor_)
-{
-  tensor.memory_ = nullptr;
-  tensor.image_buffer_memory_ = nullptr;
-}
-
-Tensor &Tensor::operator=(Tensor &&tensor)
-{
-  if (this != &tensor)
-  {
-    Release();
-    std::swap(memory_, tensor.memory_);
-    std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
-    std::swap(memory_owner_, tensor.memory_owner_);
-    std::swap(shape_, tensor.shape_);
-    std::swap(descriptor_, tensor.descriptor_);
-  }
-  return *this;
-}
-
-void Tensor::Release()
-{
-  // image_buffer_memory_ always owned by object
-  if (image_buffer_memory_)
-  {
-    clReleaseMemObject(image_buffer_memory_);
-    image_buffer_memory_ = nullptr;
-  }
-  if (memory_owner_ && memory_)
-  {
-    clReleaseMemObject(memory_);
-    memory_ = nullptr;
-  }
-}
-
-absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                     GPUResourcesWithValue *resources) const
-{
-  const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
-  if (buffer_desc)
-  {
-    if (descriptor_.storage_type != TensorStorageType::BUFFER)
-    {
-      return absl::InvalidArgumentError("Tensor can be used with BufferDescriptor only wtih "
-                                        "TensorStorageType::BUFFER.");
-    }
-    resources->buffers.push_back({"buffer", memory_});
-    return absl::OkStatus();
-  }
-  const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(obj_ptr);
-  if (!tensor_desc)
-  {
-    return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
-  }
-  if (descriptor_.HasAxis(Axis::WIDTH))
-  {
-    resources->ints.push_back({"width", Width()});
-    resources->ints.push_back({"width_div2", Width() / 2});
-    resources->ints.push_back({"width_div4", Width() / 4});
-    resources->ints.push_back({"width_batched", Width() * Batch()});
-    resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2});
-    resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4});
-  }
-  if (descriptor_.HasAxis(Axis::HEIGHT))
-  {
-    resources->ints.push_back({"height", Height()});
-  }
-  if (descriptor_.HasAxis(Axis::CHANNELS))
-  {
-    resources->ints.push_back({"slices", Slices()});
-    resources->ints.push_back({"channels", Channels()});
-  }
-  if (descriptor_.HasAxis(Axis::BATCH))
-  {
-    resources->ints.push_back({"batch", Batch()});
-  }
-  if (descriptor_.HasAxis(Axis::DEPTH))
-  {
-    resources->ints.push_back({"depth", Depth()});
-  }
-
-  if (descriptor_.storage_type == TensorStorageType::BUFFER)
-  {
-    resources->buffers.push_back({"buffer", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
-           descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    resources->images2d.push_back({"image2d", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    resources->image2d_arrays.push_back({"image2d_array", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    resources->images3d.push_back({"image3d", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    if (obj_ptr->GetAccess() == AccessType::READ)
-    {
-      resources->image_buffers.push_back({"image_buffer", image_buffer_memory_});
-    }
-    else
-    {
-      resources->buffers.push_back({"buffer", memory_});
-    }
-  }
-
-  return absl::OkStatus();
-}
-
-int3 Tensor::GetFullTensorRegion() const
-{
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::IMAGE_BUFFER:
-      return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
-    case TensorStorageType::TEXTURE_2D:
-      return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
-    case TensorStorageType::UNKNOWN:
-      return {-1, -1, -1};
-  }
-  return {-1, -1, -1};
-}
-
-absl::Status Tensor::IsValid(const BHWC &shape) const
-{
-  if (shape.b != shape_.b)
-  {
-    return absl::InvalidArgumentError("Shape batch does not match tensor batch");
-  }
-  if (shape.w != shape_.w)
-  {
-    return absl::InvalidArgumentError("Shape width does not match tensor width");
-  }
-  if (shape.h != shape_.h)
-  {
-    return absl::InvalidArgumentError("Shape height does not match tensor height");
-  }
-  if (shape.c != shape_.c)
-  {
-    return absl::InvalidArgumentError("Shape channels does not match tensor channels");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::IsValid(const BHWDC &shape) const
-{
-  if (shape.b != shape_.b)
-  {
-    return absl::InvalidArgumentError("Shape batch does not match tensor batch");
-  }
-  if (shape.w != shape_.w)
-  {
-    return absl::InvalidArgumentError("Shape width does not match tensor width");
-  }
-  if (shape.h != shape_.h)
-  {
-    return absl::InvalidArgumentError("Shape height does not match tensor height");
-  }
-  if (shape.d != shape_.d)
-  {
-    return absl::InvalidArgumentError("Shape depth does not match tensor depth");
-  }
-  if (shape.c != shape_.c)
-  {
-    return absl::InvalidArgumentError("Shape channels does not match tensor channels");
-  }
-  return absl::OkStatus();
-}
-
-int Tensor::GetAlignedChannels() const
-{
-  return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape_.c
-                                                                          : AlignByN(shape_.c, 4);
-}
-
-uint64_t Tensor::GetMemorySizeInBytes() const
-{
-  const uint64_t flt_size = static_cast<uint64_t>(SizeOf(descriptor_.data_type));
-  const uint64_t flt4_size = 4 * flt_size;
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-      return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
-    default:
-      return 0;
-  }
-}
-
-cl_mem Tensor::GetMemoryPtr() const
-{
-  return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER ? image_buffer_memory_
-                                                                     : memory_;
-}
-
-cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; }
-
-absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue)
-{
-  void *data_ptr = nullptr;
-  const int aligned_channels = GetAlignedChannels();
-  const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
-
-  const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  data_f.resize(elements_count);
-  data_ptr = data_f.data();
-  DataFromBHWDC(in, shape_, descriptor_, absl::MakeSpan(data_f.data(), data_f.size()));
-
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr));
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      RETURN_IF_ERROR(queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
-      break;
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const TensorFloat32 &src)
-{
-  RETURN_IF_ERROR(IsValid(src.shape));
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
-                               const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
-                               const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src)
-{
-  RETURN_IF_ERROR(IsValid(src.shape));
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const
-{
-  void *data_ptr = nullptr;
-  const int aligned_channels = GetAlignedChannels();
-  const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
-  const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-
-  std::vector<float> data_f;
-  data_f.resize(elements_count);
-  data_ptr = data_f.data();
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr));
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      RETURN_IF_ERROR(queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
-      break;
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-
-  if (descriptor_.data_type == DataType::FLOAT32)
-  {
-    DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_, descriptor_, out);
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const
-{
-  RETURN_IF_ERROR(IsValid(dst->shape));
-  return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const
-{
-  RETURN_IF_ERROR(IsValid(dst->shape));
-  return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context)
-{
-  shape_ = desc.shape;
-  descriptor_.data_type = desc.data_type;
-  descriptor_.storage_type = desc.storage_type;
-  descriptor_.layout = desc.layout;
-  memory_owner_ = true;
-  CLMemory memory;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
-  memory_ = memory.Release();
-  if (desc.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(*context, memory_, desc.data_type,
-                                                shape_.b * shape_.w * shape_.h * shape_.d *
-                                                  DivideRoundUp(shape_.c, 4),
-                                                &image_buffer_memory_));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CreateTensor(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result)
-{
-  return CreateTensor(context, shape, descriptor, nullptr, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CreateTensorShared(context, shape5D, descriptor, memory, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result)
-{
-  return CreateTensorShared(context, shape, descriptor, memory, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result)
-{
-  return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h b/runtime/onert/backend/gpu_cl/open_cl/Tensor.h

deleted file mode 100644 (file)

index b1930a4..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-
-#include <cstdint>
-#include <memory>
-
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "OpenclWrapper.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Tensor : public GPUObject
-{
-public:
-  Tensor() : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
-  Tensor(cl_mem memory, bool memory_owner, const BHWC &shape, const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape, const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
-         const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
-         const TensorDescriptor &descriptor);
-
-  // Move only
-  Tensor(Tensor &&tensor);
-  Tensor &operator=(Tensor &&tensor);
-  Tensor(const Tensor &) = delete;
-  Tensor &operator=(const Tensor &) = delete;
-
-  virtual ~Tensor() { Release(); }
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  int Width() const { return shape_.w; }
-  int Height() const { return shape_.h; }
-  int Depth() const { return shape_.d; }
-  int Channels() const { return shape_.c; }
-  int Slices() const { return DivideRoundUp(shape_.c, 4); }
-  int Batch() const { return shape_.b; }
-  TensorDescriptor GetDescriptor() const { return descriptor_; }
-  DataType GetDataType() const { return descriptor_.data_type; }
-  TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
-
-  // for profiling and memory statistics
-  uint64_t GetMemorySizeInBytes() const;
-
-  cl_mem GetMemoryPtr() const;
-
-  // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
-  // memory ptr.
-  cl_mem GetMemoryPtrForWriting() const;
-
-  absl::Status WriteData(CLCommandQueue *queue, const TensorFloat32 &src);
-  absl::Status WriteData(CLCommandQueue *queue,
-                         const InternalTensor<Linear, DataType::FLOAT32> &src);
-  absl::Status WriteData(CLCommandQueue *queue, const InternalTensor<HWC, DataType::FLOAT32> &src);
-
-  absl::Status WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src);
-  absl::Status ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const;
-  absl::Status ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const;
-
-  absl::Status CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context);
-
-private:
-  absl::Status IsValid(const BHWC &shape) const;
-  absl::Status IsValid(const BHWDC &shape) const;
-
-  int GetChannelsAlignment() const;
-  int GetAlignedChannels() const;
-
-  absl::Status WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue);
-  absl::Status ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const;
-
-  int3 GetFullTensorRegion() const;
-  void Release();
-
-  cl_mem memory_;
-  cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only
-  bool memory_owner_;
-  BHWDC shape_;
-  TensorDescriptor descriptor_;
-};
-
-using TensorPtr = std::shared_ptr<Tensor>;
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc

deleted file mode 100644 (file)

index 7ede387..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc
+++ /dev/null
@@ -1,1116 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorType.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Shape.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetWriteImageFromDataType(DataType data_type)
-{
-  if (data_type == DataType::FLOAT32)
-  {
-    return "write_imagef";
-  }
-  else if (data_type == DataType::FLOAT16)
-  {
-    return "write_imageh";
-  }
-  else
-  {
-    throw std::runtime_error("Not supported data type");
-  }
-}
-
-} // namespace
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode)
-{
-  switch (address_mode)
-  {
-    case TextureAddressMode::DONT_CARE:
-      return "smp_none";
-    case TextureAddressMode::ZERO:
-      return "smp_zero";
-  }
-  return "";
-}
-
-std::string ToString(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return "TensorStorageType::UNKNOWN";
-    case TensorStorageType::BUFFER:
-      return "TensorStorageType::BUFFER";
-    case TensorStorageType::TEXTURE_ARRAY:
-      return "TensorStorageType::TEXTURE_ARRAY";
-    case TensorStorageType::TEXTURE_2D:
-      return "TensorStorageType::TEXTURE_2D";
-    case TensorStorageType::TEXTURE_3D:
-      return "TensorStorageType::TEXTURE_3D";
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return "TensorStorageType::SINGLE_TEXTURE_2D";
-    case TensorStorageType::IMAGE_BUFFER:
-      return "TensorStorageType::IMAGE_BUFFER";
-  }
-  return "";
-}
-
-TensorDescriptor::TensorDescriptor(TensorDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), data_type(desc.data_type),
-    storage_type(desc.storage_type), layout(desc.layout), shape(desc.shape),
-    data(std::move(desc.data))
-{
-}
-TensorDescriptor &TensorDescriptor::operator=(TensorDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(data_type, desc.data_type);
-    std::swap(storage_type, desc.storage_type);
-    std::swap(layout, desc.layout);
-    std::swap(shape, desc.shape);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-GPUResources TensorDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  if (HasAxis(Axis::WIDTH))
-  {
-    resources.ints.push_back("width");
-    resources.ints.push_back("width_div2");
-    resources.ints.push_back("width_div4");
-    resources.ints.push_back("width_batched");
-    resources.ints.push_back("width_batched_div2");
-    resources.ints.push_back("width_batched_div4");
-  }
-  if (HasAxis(Axis::HEIGHT))
-  {
-    resources.ints.push_back("height");
-  }
-  if (HasAxis(Axis::CHANNELS))
-  {
-    resources.ints.push_back("slices");
-    resources.ints.push_back("channels");
-  }
-  if (HasAxis(Axis::BATCH))
-  {
-    resources.ints.push_back("batch");
-  }
-  if (HasAxis(Axis::DEPTH))
-  {
-    resources.ints.push_back("depth");
-  }
-  if (storage_type == TensorStorageType::BUFFER)
-  {
-    GPUBufferDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    desc.element_size = 4;
-    auto it1 = state_vars_.find("ElementsX2");
-    if (it1 != state_vars_.end() && it1->second == "true")
-    {
-      desc.element_size = 8;
-    }
-    auto it2 = state_vars_.find("ElementsX4");
-    if (it2 != state_vars_.end() && it2->second == "true")
-    {
-      desc.element_size = 16;
-    }
-    resources.buffers.push_back({"buffer", desc});
-  }
-  else if (storage_type == TensorStorageType::SINGLE_TEXTURE_2D ||
-           storage_type == TensorStorageType::TEXTURE_2D)
-  {
-    GPUImage2DDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.images2d.push_back({"image2d", desc});
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    GPUImage2DArrayDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.image2d_arrays.push_back({"image2d_array", desc});
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    GPUImage3DDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.images3d.push_back({"image3d", desc});
-  }
-  else if (storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    if (access_type_ == AccessType::READ)
-    {
-      GPUImageBufferDescriptor desc;
-      desc.data_type = data_type;
-      desc.access_type = access_type_;
-      resources.image_buffers.push_back({"image_buffer", desc});
-    }
-    else
-    {
-      GPUBufferDescriptor desc;
-      desc.data_type = data_type;
-      desc.access_type = access_type_;
-      desc.element_size = 4;
-      resources.buffers.push_back({"buffer", desc});
-    }
-  }
-  return resources;
-}
-
-absl::Status TensorDescriptor::PerformSelector(const std::string &selector,
-                                               const std::vector<std::string> &args,
-                                               const std::vector<std::string> &template_args,
-                                               std::string *result) const
-{
-  if (selector == "Width")
-  {
-    *result = GetWidth();
-    return absl::OkStatus();
-  }
-  else if (selector == "Height")
-  {
-    *result = "height";
-    return absl::OkStatus();
-  }
-  else if (selector == "Slices")
-  {
-    *result = "slices";
-    return absl::OkStatus();
-  }
-  else if (selector == "SliceStride")
-  {
-    *result = GetSliceStride();
-    return absl::OkStatus();
-  }
-  else if (selector == "Channels")
-  {
-    *result = "channels";
-    return absl::OkStatus();
-  }
-  else if (selector == "Batch")
-  {
-    if (HasAxis(Axis::BATCH))
-    {
-      *result = "batch";
-    }
-    else
-    {
-      *result = "1";
-    }
-    return absl::OkStatus();
-  }
-  else if (selector == "Depth")
-  {
-    *result = "depth";
-    return absl::OkStatus();
-  }
-  else if (selector == "SetBatchRef")
-  {
-    if (args.size() != 1)
-    {
-      return absl::InvalidArgumentError("Unsupported arguments in SetBatchRef selector");
-    }
-    state_vars_["batch_id"] = args[0];
-    *result = "";
-    return absl::OkStatus();
-  }
-  else if (selector == "Read")
-  {
-    return PerformReadSelector(args, template_args, result);
-  }
-  else if (selector == "Write")
-  {
-    return PerformWriteSelector(args, result);
-  }
-  else if (selector == "WriteLinear")
-  {
-    return PerformWriteLinearSelector(args, result);
-  }
-  else if (selector == "GetAddress")
-  {
-    return PerformGetAddressSelector(args, result);
-  }
-  else if (selector == "GetPtrWithSliceOffset")
-  {
-    return PerformGetPtrWithSliceOffsetSelector(args, result);
-  }
-  else if (selector == "GetWHOffset")
-  {
-    return PerformGetWHOffsetSelector(args, result);
-  }
-  else if (selector == "GetHandle")
-  {
-    return PerformGetHandleSelector(args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("TensorDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status TensorDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                   const std::vector<std::string> &template_args,
-                                                   std::string *result) const
-{
-  DataType read_as_type = data_type;
-  if (!template_args.empty())
-  {
-    if (template_args.size() != 1)
-    {
-      return absl::NotFoundError("Unrecognized Read selector template arguments.");
-    }
-    else
-    {
-      RETURN_IF_ERROR(GetDataTypeFromTemplateArgs(template_args[0], &read_as_type));
-    }
-  }
-  if (args.size() == 1)
-  { // function overload for 1D linear types.
-    if (storage_type == TensorStorageType::BUFFER ||
-        storage_type == TensorStorageType::IMAGE_BUFFER)
-    {
-      *result = Read(read_as_type, args[0]);
-      return absl::OkStatus();
-    }
-    else
-    {
-      return absl::InvalidArgumentError(
-        "Read selector with single argument can be used only with linear "
-        "storage types(BUFFER or IMAGE_BUFFER)");
-    }
-  }
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Read selector");
-  }
-
-  *result = Read(read_as_type, GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector(
-  const std::vector<std::string> &args, std::string *value_name, std::string *x_coord,
-  std::string *y_coord, std::string *s_coord) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Write selector");
-  }
-  *value_name = args[0];
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    *x_coord = absl::StrCat("((", xc, ") * batch + (", bc, "))");
-  }
-  else
-  {
-    *x_coord = absl::StrCat("(", xc, ")");
-  }
-  *y_coord = absl::StrCat("(", yc, ")");
-  *s_coord = absl::StrCat("(", sc, ")");
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteSelector(const std::vector<std::string> &args,
-                                                    std::string *result) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Write selector");
-  }
-  *result = Write(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteLinearSelector(const std::vector<std::string> &args,
-                                                          std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
-  {
-    return absl::InvalidArgumentError("WriteLinear selector can be used only with linear "
-                                      "storages(BUFFER/IMAGE_BUFFER)");
-  }
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError("Unrecognized WriteLinear selector");
-  }
-  *result = Write(args[0], "(" + args[1] + ")");
-  return absl::OkStatus();
-}
-
-std::string TensorDescriptor::Read(DataType read_as_type, const std::string &global_address) const
-{
-  const std::string read_as = read_as_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-  std::string image_type;
-  if (storage_type == TensorStorageType::TEXTURE_2D ||
-      storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    image_type = "image2d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    image_type = "image3d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    image_type = "image2d_array";
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-      if (read_as_type == data_type)
-      {
-        return absl::StrCat("buffer[", global_address, "]");
-      }
-      else
-      {
-        const std::string conversion =
-          read_as_type == DataType::FLOAT16 ? "convert_half4" : "convert_float4";
-        return absl::StrCat(conversion, "(buffer[", global_address, "])");
-      }
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return absl::StrCat(read_as, "(", image_type,
-                          ", " + TextureAddressModeToString(ModeFromState()) + ", ", global_address,
-                          ")");
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::StrCat(read_as, "(image_buffer, ", global_address, ")");
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-std::string TensorDescriptor::Write(const std::string &var_name,
-                                    const std::string &global_address) const
-{
-  std::string image_type;
-  if (storage_type == TensorStorageType::TEXTURE_2D ||
-      storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    image_type = "image2d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    image_type = "image3d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    image_type = "image2d_array";
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::StrCat("buffer[", global_address, "] = ", var_name, ";\n");
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return absl::StrCat(GetWriteImageFromDataType(data_type), "(", image_type, ", ",
-                          global_address, ", ", var_name, ");\n");
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-absl::Status TensorDescriptor::PerformGetAddressSelector(const std::vector<std::string> &args,
-                                                         std::string *result) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 3 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized GetAddress selector");
-  }
-
-  *result = DeclareAddress(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status
-TensorDescriptor::PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
-                                                       std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER)
-  {
-    return absl::InvalidArgumentError(
-      "GetPtrWithSliceOffset selector can be used only with BUFFER");
-  }
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(
-      absl::StrCat("GetPtrWithSliceOffset require one argument(slice coordinate), but ",
-                   args.size(), " was passed"));
-  }
-  *result = absl::StrCat("buffer + ", args[0], " * ", GetSliceStride());
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetWHOffsetSelector(const std::vector<std::string> &args,
-                                                          std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
-  {
-    return absl::InvalidArgumentError(
-      "GetWHOffset selector can be used only with BUFFER/IMAGE_BUFFER");
-  }
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "GetWHOffset require two arguments(X and Y coordinates), but ", args.size(), " was passed"));
-  }
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    auto it = state_vars_.find("batch_id");
-    std::string batch_id;
-    if (it == state_vars_.end())
-    {
-      return absl::NotFoundError(
-        "Not found batch_id. Should be setted up by SetBatchRef(). method");
-    }
-    else
-    {
-      batch_id = it->second;
-    }
-    *result = absl::StrCat("((", args[1], ") * ", GetWidth(), " + (", args[0], ")) * batch + (",
-                           batch_id, ")");
-  }
-  else
-  {
-    *result = absl::StrCat("(", args[1], ") * ", GetWidth(), " + (", args[0], ")");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetHandleSelector(const std::vector<std::string> &args,
-                                                        std::string *result) const
-{
-  if (!args.empty())
-  {
-    return absl::NotFoundError(
-      absl::StrCat("GetHandle does not require arguments, but ", args.size(), " was passed"));
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-      *result = "buffer";
-      return absl::OkStatus();
-    case TensorStorageType::IMAGE_BUFFER:
-      if (access_type_ == AccessType::READ)
-      {
-        *result = "image_buffer";
-      }
-      else
-      {
-        *result = "buffer";
-      }
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      *result = "image2d";
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_ARRAY:
-      *result = "image2d_array";
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_3D:
-      *result = "image3d";
-      return absl::OkStatus();
-    case TensorStorageType::UNKNOWN:
-      return absl::UnavailableError("Unknown type");
-  }
-  return absl::UnavailableError("Unknown type");
-}
-
-std::string TensorDescriptor::DeclareAddress(const std::string &var_name,
-                                             const std::string &address) const
-{
-  return absl::StrCat(StorageTypeToAddressType(), " ", var_name, " = ", address, ";");
-}
-
-std::string TensorDescriptor::StorageTypeToAddressType() const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return "int";
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return "int2";
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return "int4";
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHS(const std::string &x,
-                                                               const std::string &y,
-                                                               const std::string &s) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      return absl::Substitute("((($2) * height + ($1)) * $3 + ($0))", x, y, s, GetWidth());
-    }
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0), ($1) * slices + ($2))", x, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::StrCat("(int2)(", x, ", ", y, ")");
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::StrCat("(int4)(", x, ", ", y, ", ", s, ", 0)");
-    case TensorStorageType::UNKNOWN:
-      return "error";
-  }
-  return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHSB(const std::string &x,
-                                                                const std::string &y,
-                                                                const std::string &s,
-                                                                const std::string &b) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::Substitute("(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y, s);
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * batch + ($1), ($2) * slices + ($3))", x, b, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * batch + ($1), ($2))", x, b, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3), 0)", x, b, y, s);
-    default:
-      throw std::runtime_error("Unknown storage type");
-  }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDS(const std::string &x,
-                                                                const std::string &y,
-                                                                const std::string &z,
-                                                                const std::string &s) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      return absl::Substitute("(((($3) * slices + ($2)) * height + ($1)) * $4 + ($0))", x, y, s, z,
-                              GetWidth());
-    }
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * depth + ($1), ($2) * slices + ($3))", x, z, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * depth + ($1), ($2))", x, z, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0), ($1), ($2) * slices + ($3), 0)", x, y, z, s);
-    case TensorStorageType::UNKNOWN:
-      return "error";
-  }
-  return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDSB(const std::string &x,
-                                                                 const std::string &y,
-                                                                 const std::string &z,
-                                                                 const std::string &s,
-                                                                 const std::string &b) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::Substitute("((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + "
-                              "($0))",
-                              b, x, y, s, z);
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3) * slices + ($4))",
-                              x, b, z, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3))", x, b, z, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3) * slices + ($4), 0)", x, b, y,
-                              z, s);
-    default:
-      throw std::runtime_error("Unknown storage type");
-  }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclaration(const std::string &xc,
-                                                            const std::string &yc,
-                                                            const std::string &zc,
-                                                            const std::string &sc,
-                                                            const std::string &bc) const
-{
-  if (layout == Layout::HWC || (IsBatchedWidth() && layout == Layout::BHWC))
-  {
-    return GetGlobalAddressNoDeclarationWHS(xc, yc, sc);
-  }
-  else if (layout == Layout::BHWC)
-  {
-    return GetGlobalAddressNoDeclarationWHSB(xc, yc, sc, bc);
-  }
-  else if (layout == Layout::HWDC || (IsBatchedWidth() && layout == Layout::BHWDC))
-  {
-    return GetGlobalAddressNoDeclarationWHDS(xc, yc, zc, sc);
-  }
-  else if (layout == Layout::BHWDC)
-  {
-    return GetGlobalAddressNoDeclarationWHDSB(xc, yc, zc, sc, bc);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported layout");
-  }
-}
-
-absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs(const std::string &template_arg,
-                                                           DataType *result) const
-{
-  std::string read_type = template_arg;
-  if (read_type == "FLT" || read_type == "ACCUM_FLT")
-  {
-    auto it = state_vars_.find(read_type);
-    if (it == state_vars_.end())
-    {
-      return absl::UnavailableError(
-        absl::StrCat("Read selector template argument ", read_type, " uninitialized."));
-    }
-    else
-    {
-      read_type = it->second;
-    }
-  }
-
-  if (read_type == "half")
-  {
-    *result = DataType::FLOAT16;
-  }
-  else if (read_type == "float")
-  {
-    *result = DataType::FLOAT32;
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("Unrecognized Read selector template argument - ", read_type));
-  }
-  return absl::OkStatus();
-}
-
-bool TensorDescriptor::HasAxis(Axis axis) const
-{
-  if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS)
-  {
-    return true;
-  }
-  if (axis == Axis::BATCH && (layout == Layout::BHWC || layout == Layout::BHWDC))
-  {
-    return true;
-  }
-  if (axis == Axis::DEPTH && (layout == Layout::HWDC || layout == Layout::BHWDC))
-  {
-    return true;
-  }
-  return false;
-}
-
-void TensorDescriptor::SetTextureAddressMode(TextureAddressMode mode)
-{
-  if (mode == TextureAddressMode::ZERO)
-  {
-    state_vars_["TextureMode"] = "ZERO";
-  }
-  else
-  {
-    state_vars_["TextureMode"] = "DONT_CARE";
-  }
-}
-
-bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string> &args, int offset,
-                                           std::string *xc, std::string *yc, std::string *zc,
-                                           std::string *sc, std::string *bc) const
-{
-  if (HasAxis(Axis::WIDTH))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *xc = args[offset++];
-  }
-  if (HasAxis(Axis::HEIGHT))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *yc = args[offset++];
-  }
-  if (HasAxis(Axis::DEPTH))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *zc = args[offset++];
-  }
-  if (HasAxis(Axis::CHANNELS))
-  {
-    if ((size_t)offset >= args.size())
-    {
-      auto it = state_vars_.find("slice_id");
-      if (it == state_vars_.end())
-      {
-        return false;
-      }
-      else
-      {
-        *sc = it->second;
-      }
-    }
-    else
-    {
-      *sc = args[offset++];
-    }
-  }
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    if ((size_t)offset >= args.size())
-    {
-      auto it = state_vars_.find("batch_id");
-      if (it == state_vars_.end())
-      {
-        return false;
-      }
-      else
-      {
-        *bc = it->second;
-      }
-    }
-    else
-    {
-      *bc = args[offset++];
-    }
-  }
-  return true;
-}
-
-bool TensorDescriptor::IsBatchedWidth() const
-{
-  auto it = state_vars_.find("BatchedWidth");
-  return it != state_vars_.end() && it->second == "true";
-}
-
-std::string TensorDescriptor::GetWidth() const
-{
-  std::string div;
-  auto it1 = state_vars_.find("ElementsX2");
-  if (it1 != state_vars_.end() && it1->second == "true")
-  {
-    div = "_div2";
-  }
-  auto it2 = state_vars_.find("ElementsX4");
-  if (it2 != state_vars_.end() && it2->second == "true")
-  {
-    div = "_div4";
-  }
-  auto it = state_vars_.find("BatchedWidth");
-  if (it != state_vars_.end() && it->second == "true")
-  {
-    return "width_batched" + div;
-  }
-  else
-  {
-    return "width" + div;
-  }
-}
-
-std::string TensorDescriptor::GetSliceStride() const
-{
-  if (IsBatchedWidth())
-  {
-    return GetWidth() + " * height";
-  }
-  else
-  {
-    if (HasAxis(Axis::BATCH))
-    {
-      return GetWidth() + " * height * batch";
-    }
-    else
-    {
-      return GetWidth() + " * height";
-    }
-  }
-}
-
-TextureAddressMode TensorDescriptor::ModeFromState() const
-{
-  auto it = state_vars_.find("TextureMode");
-  if (it != state_vars_.end())
-  {
-    if (it->second == "ZERO")
-    {
-      return TextureAddressMode::ZERO;
-    }
-    else
-    {
-      return TextureAddressMode::DONT_CARE;
-    }
-  }
-  else
-  {
-    return TextureAddressMode::DONT_CARE;
-  }
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
-  shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
-  UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
-  shape = BHWDC(1, 1, 1, 1, src.shape.v);
-  UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(absl::Span<const float> src)
-{
-  int aligned_channels =
-    storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : AlignByN(shape.c, 4);
-  int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels;
-  data.resize(elements_count * SizeOf(data_type));
-  if (data_type == DataType::FLOAT32)
-  {
-    float *gpu_data = reinterpret_cast<float *>(data.data());
-    DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count));
-  }
-}
-
-bool TensorDescriptor::SupportsZeroClamp(const Axis &axis) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return false;
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return false;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return axis == Axis::WIDTH || axis == Axis::HEIGHT;
-    case TensorStorageType::TEXTURE_3D:
-      return axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::DEPTH;
-  }
-  return false;
-}
-
-bool TensorDescriptor::CanReadOutOfBorder(const Axis &) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return false;
-    case TensorStorageType::BUFFER:
-      return false;
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return true;
-  }
-  return false;
-}
-
-bool TensorDescriptor::IsLinear() const
-{
-  return storage_type == TensorStorageType::BUFFER ||
-         storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-bool TensorDescriptor::ReturnsZeroForNegOneRead() const
-{
-  return storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-namespace
-{
-int GetLinearIndex(const TensorDescriptor &desc, const BHWDC &shape, int b, int x, int y, int d,
-                   int s, int sub_c)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  switch (desc.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) * 4 +
-             sub_c; // DSHWBC4
-    case TensorStorageType::TEXTURE_2D:
-      return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) * 4 +
-             sub_c; // HSWBDC4
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c + sub_c; // HWBDC
-    default:
-      return -1;
-  }
-  return -1;
-}
-
-int GetChannelsAlignment(const TensorDescriptor &desc, const BHWDC &shape)
-{
-  return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : 4;
-}
-} // namespace
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
-                   absl::Span<T> dst)
-{
-  const int channels_alignment = GetChannelsAlignment(desc, shape);
-  const int slices = DivideRoundUp(shape.c, 4);
-  for (int b = 0; b < shape.b; ++b)
-  {
-    for (int s = 0; s < slices; ++s)
-    {
-      for (int y = 0; y < shape.h; ++y)
-      {
-        for (int x = 0; x < shape.w; ++x)
-        {
-          for (int d = 0; d < shape.d; ++d)
-          {
-            for (int c = 0; c < channels_alignment; ++c)
-            {
-              float value;
-              if (s * 4 + c < shape.c)
-              {
-                const int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
-                value = src[cpu_index];
-              }
-              else
-              {
-                value = 0.0f;
-              }
-              int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
-              dst[gpu_index] = value;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template void DataFromBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
-                                   const TensorDescriptor &desc, absl::Span<float> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
-                 absl::Span<float> dst)
-{
-  const int channels_alignment = GetChannelsAlignment(desc, shape);
-  const int slices = DivideRoundUp(shape.c, 4);
-  for (int b = 0; b < shape.b; ++b)
-  {
-    for (int s = 0; s < slices; ++s)
-    {
-      for (int y = 0; y < shape.h; ++y)
-      {
-        for (int x = 0; x < shape.w; ++x)
-        {
-          for (int d = 0; d < shape.d; ++d)
-          {
-            for (int c = 0; c < channels_alignment; ++c)
-            {
-              if (s * 4 + c >= shape.c)
-              {
-                continue;
-              }
-              int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
-              int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
-              dst[cpu_index] = src[gpu_index];
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template void DataToBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
-                                 const TensorDescriptor &desc, absl::Span<float> dst);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h b/runtime/onert/backend/gpu_cl/open_cl/TensorType.h

deleted file mode 100644 (file)

index 4552378..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-
-#include <cstddef>
-#include <string>
-
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TextureAddressMode
-{
-  DONT_CARE, // translated to CLK_ADDRESS_NONE
-  ZERO,      // translated to CLK_ADDRESS_CLAMP
-};
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode);
-
-enum class TensorStorageType
-{
-  UNKNOWN,
-  BUFFER,
-  IMAGE_BUFFER,
-  TEXTURE_2D,
-  TEXTURE_3D,
-  TEXTURE_ARRAY,
-  SINGLE_TEXTURE_2D
-};
-
-struct TensorDescriptor : public GPUObjectDescriptor
-{
-  TensorDescriptor() = default;
-  TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
-    : data_type(dt), storage_type(st), layout(l)
-  {
-  }
-
-  TensorDescriptor(const TensorDescriptor &) = default;
-  TensorDescriptor &operator=(const TensorDescriptor &) = default;
-  TensorDescriptor(TensorDescriptor &&desc);
-  TensorDescriptor &operator=(TensorDescriptor &&desc);
-
-  bool operator==(const TensorDescriptor &d) const
-  {
-    return data_type == d.data_type && storage_type == d.storage_type && layout == d.layout;
-  }
-
-  bool operator!=(const TensorDescriptor &d) const { return !(*this == d); }
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override { data.clear(); }
-
-  bool HasAxis(Axis axis) const;
-  void SetTextureAddressMode(TextureAddressMode mode);
-
-  absl::Status GetLinkingContextFromWriteSelector(const std::vector<std::string> &args,
-                                                  std::string *value_name, std::string *x_coord,
-                                                  std::string *y_coord, std::string *s_coord) const;
-
-  void UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src);
-  void UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src);
-
-  bool SupportsZeroClamp(const Axis &axis) const;
-  bool CanReadOutOfBorder(const Axis &axis) const;
-  bool IsLinear() const;
-
-  // applicable only for types that: IsLinear -> true.
-  // In this case for address we have 1d component - addr (int)
-  // If for addr == -1 this linear storage type returns FLT4(0.0), this function
-  // returns true, otherwise false
-  bool ReturnsZeroForNegOneRead() const;
-
-  DataType data_type = DataType::UNKNOWN;
-  TensorStorageType storage_type = TensorStorageType::UNKNOWN;
-  // This field describes logical layout, actual(physical) GPU layout can be
-  // totally different.
-  Layout layout = Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
-
-  // optional
-  BHWDC shape;
-  std::vector<uint8_t> data;
-
-private:
-  absl::Status PerformReadSelector(const std::vector<std::string> &args,
-                                   const std::vector<std::string> &template_args,
-                                   std::string *result) const;
-
-  absl::Status PerformGetAddressSelector(const std::vector<std::string> &args,
-                                         std::string *result) const;
-
-  absl::Status PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
-                                                    std::string *result) const;
-
-  absl::Status PerformGetWHOffsetSelector(const std::vector<std::string> &args,
-                                          std::string *result) const;
-
-  absl::Status PerformGetHandleSelector(const std::vector<std::string> &args,
-                                        std::string *result) const;
-
-  std::string DeclareAddress(const std::string &var_name, const std::string &address) const;
-
-  std::string StorageTypeToAddressType() const;
-
-  absl::Status PerformWriteSelector(const std::vector<std::string> &args,
-                                    std::string *result) const;
-
-  absl::Status PerformWriteLinearSelector(const std::vector<std::string> &args,
-                                          std::string *result) const;
-
-  std::string Read(DataType read_as_type, const std::string &global_address) const;
-  std::string Write(const std::string &var_name, const std::string &global_address) const;
-
-  bool IsBatchedWidth() const;
-
-  std::string GetWidth() const;
-  std::string GetSliceStride() const;
-
-  TextureAddressMode ModeFromState() const;
-
-  absl::Status GetDataTypeFromTemplateArgs(const std::string &template_arg, DataType *result) const;
-
-  std::string GetGlobalAddressNoDeclarationWHS(const std::string &x, const std::string &y,
-                                               const std::string &s) const;
-  std::string GetGlobalAddressNoDeclarationWHSB(const std::string &x, const std::string &y,
-                                                const std::string &s, const std::string &b) const;
-  std::string GetGlobalAddressNoDeclarationWHDS(const std::string &x, const std::string &y,
-                                                const std::string &z, const std::string &s) const;
-  std::string GetGlobalAddressNoDeclarationWHDSB(const std::string &x, const std::string &y,
-                                                 const std::string &z, const std::string &s,
-                                                 const std::string &b) const;
-  std::string GetGlobalAddressNoDeclaration(const std::string &xc, const std::string &yc,
-                                            const std::string &zc, const std::string &sc,
-                                            const std::string &bc) const;
-
-  bool ParseCoordsFromArgs(const std::vector<std::string> &args, int offset, std::string *xc,
-                           std::string *yc, std::string *zc, std::string *sc,
-                           std::string *bc) const;
-
-  void UploadData(absl::Span<const float> src);
-};
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
-                   absl::Span<T> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
-                 absl::Span<float> dst);
-
-std::string ToString(TensorStorageType type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc

deleted file mode 100644 (file)

index b1f8309..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorTypeUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::BUFFER:
-      return ObjectType::OPENCL_BUFFER;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return ObjectType::OPENCL_TEXTURE;
-    default:
-      return ObjectType::UNKNOWN;
-  }
-}
-
-DataLayout ToDataLayout(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::BUFFER:
-      return DataLayout::DHWC4;
-    case TensorStorageType::IMAGE_BUFFER:
-      return DataLayout::DHWC4;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return DataLayout::BHWC;
-    case TensorStorageType::TEXTURE_2D:
-      return DataLayout::HDWC4;
-    case TensorStorageType::TEXTURE_ARRAY:
-      return DataLayout::DHWC4;
-    case TensorStorageType::TEXTURE_3D:
-      return DataLayout::DHWC4;
-    default:
-      return DataLayout::UNKNOWN;
-  }
-}
-
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout)
-{
-  switch (object_type)
-  {
-    case ObjectType::OPENCL_BUFFER:
-      return TensorStorageType::BUFFER;
-    case ObjectType::OPENCL_TEXTURE:
-      switch (data_layout)
-      {
-        case DataLayout::BHWC:
-          return TensorStorageType::SINGLE_TEXTURE_2D;
-        case DataLayout::DHWC4:
-          return TensorStorageType::TEXTURE_ARRAY;
-        case DataLayout::HDWC4:
-          return TensorStorageType::TEXTURE_2D;
-        default:
-          return TensorStorageType::UNKNOWN;
-      }
-    default:
-      return TensorStorageType::UNKNOWN;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h

deleted file mode 100644 (file)

index f56fc3d..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
-
-#include "Api.h"
-#include "TensorType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type);
-
-DataLayout ToDataLayout(TensorStorageType type);
-
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc

deleted file mode 100644 (file)

index ae25e85..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Texture2d.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// Creates new 4-channel 2D texture with cl_channel_type elements
-absl::Status CreateTexture2D(int width, int height, DataType type, void *data, CLContext *context,
-                             Texture2D *result)
-{
-  cl_mem texture;
-  cl_channel_type channel_type = DataTypeToChannelType(type);
-  RETURN_IF_ERROR(
-    CreateRGBAImage2D(context->context(), width, height, channel_type, data, &texture));
-  *result = Texture2D(texture, width, height, channel_type);
-
-  return absl::OkStatus();
-}
-} // namespace
-
-Texture2DDescriptor::Texture2DDescriptor(Texture2DDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
-    normalized(desc.normalized), normalized_type(desc.normalized_type), size(desc.size),
-    data(std::move(desc.data))
-{
-}
-
-Texture2DDescriptor &Texture2DDescriptor::operator=(Texture2DDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(element_type, desc.element_type);
-    std::swap(normalized, desc.normalized);
-    std::swap(normalized_type, desc.normalized_type);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void Texture2DDescriptor::Release() { data.clear(); }
-
-GPUResources Texture2DDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  GPUImage2DDescriptor desc;
-  desc.data_type = element_type;
-  desc.access_type = access_type_;
-  resources.images2d.push_back({"tex2d", desc});
-  return resources;
-}
-
-absl::Status Texture2DDescriptor::PerformSelector(const std::string &selector,
-                                                  const std::vector<std::string> &args,
-                                                  const std::vector<std::string> &,
-                                                  std::string *result) const
-{
-  if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("Texture2DDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status Texture2DDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                      std::string *result) const
-{
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError(absl::StrCat("Texture2DDescriptor Read require two arguments, but ",
-                                            args.size(), " was passed"));
-  }
-  std::string read;
-  switch (element_type)
-  {
-    case DataType::FLOAT32:
-      read = "read_imagef";
-      break;
-    case DataType::FLOAT16:
-      read = "read_imageh";
-      break;
-    case DataType::INT8:
-    case DataType::INT16:
-    case DataType::INT32:
-      if (normalized)
-      {
-        read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-      }
-      else
-      {
-        read = "read_imagei";
-      }
-      break;
-    case DataType::UINT8:
-    case DataType::UINT16:
-    case DataType::UINT32:
-      if (normalized)
-      {
-        read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-      }
-      else
-      {
-        read = "read_imageui";
-      }
-      break;
-    default:
-      read = "unknown_type";
-      break;
-  }
-  *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", " + args[1] + "))");
-  return absl::OkStatus();
-}
-
-absl::Status Texture2DDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Texture2D gpu_texture;
-  RETURN_IF_ERROR(gpu_texture.CreateFromTexture2DDescriptor(*this, context));
-  *result = absl::make_unique<Texture2D>(std::move(gpu_texture));
-  return absl::OkStatus();
-}
-
-Texture2D::Texture2D(cl_mem texture, int width, int height, cl_channel_type type)
-  : texture_(texture), width_(width), height_(height), channel_type_(type)
-{
-}
-
-Texture2D::Texture2D(Texture2D &&texture)
-  : texture_(texture.texture_), width_(texture.width_), height_(texture.height_),
-    channel_type_(texture.channel_type_)
-{
-  texture.texture_ = nullptr;
-  texture.width_ = 0;
-  texture.height_ = 0;
-}
-
-Texture2D &Texture2D::operator=(Texture2D &&texture)
-{
-  if (this != &texture)
-  {
-    Release();
-    std::swap(channel_type_, texture.channel_type_);
-    std::swap(width_, texture.width_);
-    std::swap(height_, texture.height_);
-    std::swap(texture_, texture.texture_);
-  }
-  return *this;
-}
-
-void Texture2D::Release()
-{
-  if (texture_)
-  {
-    clReleaseMemObject(texture_);
-    texture_ = nullptr;
-    width_ = 0;
-    height_ = 0;
-  }
-}
-
-absl::Status Texture2D::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                        GPUResourcesWithValue *resources) const
-{
-  const auto *texture_desc = dynamic_cast<const Texture2DDescriptor *>(obj_ptr);
-  if (!texture_desc)
-  {
-    return absl::InvalidArgumentError("Expected Texture2DDescriptor on input.");
-  }
-
-  resources->images2d.push_back({"tex2d", texture_});
-  return absl::OkStatus();
-}
-
-absl::Status Texture2D::CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc,
-                                                      CLContext *context)
-{
-  width_ = desc.size.x;
-  height_ = desc.size.y;
-  channel_type_ = DataTypeToChannelType(desc.element_type, desc.normalized);
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  return CreateRGBAImage2D(context->context(), desc.size.x, desc.size.y, channel_type_, data_ptr,
-                           &texture_);
-}
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context, result);
-}
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
-                                 Texture2D *result)
-{
-  return CreateTexture2D(width, height, type, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
-                                 CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, type, data, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h

deleted file mode 100644 (file)

index 2645070..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct Texture2DDescriptor : public GPUObjectDescriptor
-{
-  DataType element_type;
-  bool normalized = false;  // used with INT data types, if normalized, we read
-                            // in kernel float data.
-  DataType normalized_type; // can be FLOAT32 or FLOAT16, using with normalized
-                            // = true
-
-  // optional
-  int2 size = int2(0, 0);
-  std::vector<uint8_t> data;
-
-  Texture2DDescriptor() = default;
-  Texture2DDescriptor(const Texture2DDescriptor &) = default;
-  Texture2DDescriptor &operator=(const Texture2DDescriptor &) = default;
-  Texture2DDescriptor(Texture2DDescriptor &&desc);
-  Texture2DDescriptor &operator=(Texture2DDescriptor &&desc);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-// Texture2D represent formatted GPU data storage.
-// Texture2D is moveable but not copyable.
-class Texture2D : public GPUObject
-{
-public:
-  Texture2D() {} // just for using Texture2D as a class members
-  Texture2D(cl_mem texture, int width, int height, cl_channel_type type);
-
-  // Move only
-  Texture2D(Texture2D &&texture);
-  Texture2D &operator=(Texture2D &&texture);
-  Texture2D(const Texture2D &) = delete;
-  Texture2D &operator=(const Texture2D &) = delete;
-
-  virtual ~Texture2D() { Release(); }
-
-  cl_mem GetMemoryPtr() const { return texture_; }
-
-  // Writes data to a texture. Data should point to a region that
-  // has exact width * height * sizeof(pixel) bytes.
-  template <typename T> absl::Status WriteData(CLCommandQueue *queue, const absl::Span<T> data);
-
-  // Reads data from Texture2D into CPU memory.
-  template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc, CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem texture_ = nullptr;
-  int width_;
-  int height_;
-  cl_channel_type channel_type_;
-};
-
-using Texture2DPtr = std::shared_ptr<Texture2D>;
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result);
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
-                                 Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
-                                 CLContext *context, Texture2D *result);
-
-template <typename T>
-absl::Status Texture2D::WriteData(CLCommandQueue *queue, const absl::Span<T> data)
-{
-  const int element_size = ChannelTypeToSizeInBytes(channel_type_);
-  if (sizeof(T) % element_size != 0)
-  {
-    return absl::InvalidArgumentError(
-      "Template type T has not suitable element type for created texture.");
-  }
-  if (4 * width_ * height_ * element_size != data.size() * sizeof(T))
-  {
-    return absl::InvalidArgumentError(
-      "absl::Span<T> data size is different from texture allocated size.");
-  }
-
-  RETURN_IF_ERROR(queue->EnqueueWriteImage(texture_, int3(width_, height_, 1), data.data()));
-
-  return absl::OkStatus();
-}
-
-template <typename T>
-absl::Status Texture2D::ReadData(CLCommandQueue *queue, std::vector<T> *result) const
-{
-  const int element_size = ChannelTypeToSizeInBytes(channel_type_);
-  if (sizeof(T) != element_size)
-  {
-    return absl::InvalidArgumentError("Pixel format is different.");
-  }
-
-  const int elements_count = width_ * height_ * 4;
-  result->resize(elements_count);
-
-  return queue->EnqueueReadImage(texture_, int3(width_, height_, 1), result->data());
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Types.h b/runtime/onert/backend/gpu_cl/open_cl/Types.h

deleted file mode 100644 (file)

index f3cf334..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Types.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// TODO(akulik): make these types Google-style compliant.
-
-template <typename T> struct alignas(sizeof(T)) Vec4
-{
-  union {
-    struct
-    {
-      T x, y, z, w;
-    };
-    std::array<T, 4> data_;
-  };
-
-  Vec4() : Vec4(T(0.0f)) {}
-
-  template <typename S> Vec4(S x_, S y_, S z_, S w_) : x(x_), y(y_), z(z_), w(w_) {}
-  explicit Vec4(T v) : x(v), y(v), z(v), w(v) {}
-
-  template <typename S> explicit Vec4(S v) : x(v), y(v), z(v), w(v) {}
-
-  Vec4(const Vec4 &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
-  template <typename S> Vec4(const Vec4<S> &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
-  Vec4 &operator=(const Vec4 &other)
-  {
-    x = other.x;
-    y = other.y;
-    z = other.z;
-    w = other.w;
-    return *this;
-  }
-
-  static constexpr int size() { return 4; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-
-  bool operator==(const Vec4 &value) const
-  {
-    return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2] &&
-           data_[3] == value[3];
-  }
-  bool operator!=(const Vec4 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec3
-{
-  union {
-    struct
-    {
-      T x, y, z;
-    };
-    std::array<T, 3> data_;
-  };
-
-  Vec3() : Vec3(T(0.0f)) {}
-
-  template <typename S> constexpr Vec3(S x_, S y_, S z_) : x(x_), y(y_), z(z_) {}
-  explicit Vec3(T v) : x(v), y(v), z(v) {}
-
-  template <typename S> explicit Vec3(S v) : x(v), y(v), z(v) {}
-
-  Vec3(const Vec3 &f) : x(f.x), y(f.y), z(f.z) {}
-
-  template <typename S> Vec3(const Vec3<S> &f) : x(f.x), y(f.y), z(f.z) {}
-
-  Vec3 &operator=(const Vec3 &other)
-  {
-    x = other.x;
-    y = other.y;
-    z = other.z;
-    return *this;
-  }
-
-  static constexpr int size() { return 3; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-  bool operator==(const Vec3 &value) const
-  {
-    return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2];
-  }
-  bool operator!=(const Vec3 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec2
-{
-  union {
-    struct
-    {
-      T x, y;
-    };
-    std::array<T, 2> data_;
-  };
-
-  Vec2() : Vec2(T(0.0f)) {}
-
-  template <typename S> Vec2(S x_, S y_) : x(x_), y(y_) {}
-  explicit Vec2(T v) : x(v), y(v) {}
-
-  template <typename S> explicit Vec2(S v) : x(v), y(v) {}
-
-  Vec2(const Vec2 &f) : x(f.x), y(f.y) {}
-
-  template <typename S> Vec2(const Vec2<S> &f) : x(f.x), y(f.y) {}
-
-  Vec2 &operator=(const Vec2 &other)
-  {
-    x = other.x;
-    y = other.y;
-    return *this;
-  }
-
-  bool operator==(const Vec2 &value) const { return data_[0] == value[0] && data_[1] == value[1]; }
-
-  bool operator!=(const Vec2 &value) const { return !(this->operator==(value)); }
-
-  static constexpr int size() { return 2; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-};
-
-using float2 = Vec2<float>;
-using byte2 = Vec2<int8_t>;
-using ubyte2 = Vec2<uint8_t>;
-using short2 = Vec2<int16_t>;
-using ushort2 = Vec2<uint16_t>;
-using int2 = Vec2<int32_t>;
-using uint2 = Vec2<uint32_t>;
-
-using float3 = Vec3<float>;
-using byte3 = Vec3<int8_t>;
-using ubyte3 = Vec3<uint8_t>;
-using short3 = Vec3<int16_t>;
-using ushort3 = Vec3<uint16_t>;
-using int3 = Vec3<int32_t>;
-using uint3 = Vec3<uint32_t>;
-
-using float4 = Vec4<float>;
-using byte4 = Vec4<int8_t>;
-using ubyte4 = Vec4<uint8_t>;
-using short4 = Vec4<int16_t>;
-using ushort4 = Vec4<uint16_t>;
-using int4 = Vec4<int32_t>;
-using uint4 = Vec4<uint32_t>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/Util.cc

deleted file mode 100644 (file)

index 9f5a838..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.cc
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Status.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string CLErrorCodeToString(cl_int error_code)
-{
-  switch (error_code)
-  {
-    case CL_SUCCESS:
-      return "Success";
-    case CL_DEVICE_NOT_FOUND:
-      return "Device not found";
-    case CL_DEVICE_NOT_AVAILABLE:
-      return "Device not available";
-    case CL_COMPILER_NOT_AVAILABLE:
-      return "Compiler not available";
-    case CL_MEM_OBJECT_ALLOCATION_FAILURE:
-      return "Memory object allocation failure";
-    case CL_OUT_OF_RESOURCES:
-      return "Out of resources";
-    case CL_OUT_OF_HOST_MEMORY:
-      return "Out of host memory";
-    case CL_PROFILING_INFO_NOT_AVAILABLE:
-      return "Profiling information not available";
-    case CL_MEM_COPY_OVERLAP:
-      return "Memory copy overlap";
-    case CL_IMAGE_FORMAT_MISMATCH:
-      return "Image format mismatch";
-    case CL_IMAGE_FORMAT_NOT_SUPPORTED:
-      return "Image format not supported";
-    case CL_BUILD_PROGRAM_FAILURE:
-      return "Build program failure";
-    case CL_MAP_FAILURE:
-      return "Mapping failure";
-    case CL_MISALIGNED_SUB_BUFFER_OFFSET:
-      return "Misaligned sub-buffer offset";
-    case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
-      return "Execution status error for events in wait list";
-    case CL_COMPILE_PROGRAM_FAILURE:
-      return "Compile program failure";
-    case CL_LINKER_NOT_AVAILABLE:
-      return "Linker not available";
-    case CL_LINK_PROGRAM_FAILURE:
-      return "Link program failure";
-    case CL_DEVICE_PARTITION_FAILED:
-      return "Device partition failed";
-    case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
-      return "Kernel argument information not available";
-
-    case CL_INVALID_VALUE:
-      return "Invalid value";
-    case CL_INVALID_DEVICE_TYPE:
-      return "Invalid device type";
-    case CL_INVALID_PLATFORM:
-      return "Invalid platform";
-    case CL_INVALID_DEVICE:
-      return "Invalid device";
-    case CL_INVALID_CONTEXT:
-      return "Invalid context";
-    case CL_INVALID_QUEUE_PROPERTIES:
-      return "Invalid queue properties";
-    case CL_INVALID_COMMAND_QUEUE:
-      return "Invalid command queue";
-    case CL_INVALID_HOST_PTR:
-      return "Invalid host pointer";
-    case CL_INVALID_MEM_OBJECT:
-      return "Invalid memory object";
-    case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
-      return "Invalid image format descriptor";
-    case CL_INVALID_IMAGE_SIZE:
-      return "Invalid image size";
-    case CL_INVALID_SAMPLER:
-      return "Invalid sampler";
-    case CL_INVALID_BINARY:
-      return "Invalid binary";
-    case CL_INVALID_BUILD_OPTIONS:
-      return "Invalid build options";
-    case CL_INVALID_PROGRAM:
-      return "Invalid program";
-    case CL_INVALID_PROGRAM_EXECUTABLE:
-      return "Invalid program executable";
-    case CL_INVALID_KERNEL_NAME:
-      return "Invalid kernel name";
-    case CL_INVALID_KERNEL_DEFINITION:
-      return "Invalid kernel definition";
-    case CL_INVALID_KERNEL:
-      return "Invalid kernel";
-    case CL_INVALID_ARG_INDEX:
-      return "Invalid argument index";
-    case CL_INVALID_ARG_VALUE:
-      return "Invalid argument value";
-    case CL_INVALID_ARG_SIZE:
-      return "Invalid argument size";
-    case CL_INVALID_KERNEL_ARGS:
-      return "Invalid kernel arguments";
-    case CL_INVALID_WORK_DIMENSION:
-      return "Invalid work dimension";
-    case CL_INVALID_WORK_GROUP_SIZE:
-      return "Invalid work group size";
-    case CL_INVALID_WORK_ITEM_SIZE:
-      return "Invalid work item size";
-    case CL_INVALID_GLOBAL_OFFSET:
-      return "Invalid global offset";
-    case CL_INVALID_EVENT_WAIT_LIST:
-      return "Invalid event wait list";
-    case CL_INVALID_EVENT:
-      return "Invalid event";
-    case CL_INVALID_OPERATION:
-      return "Invalid operation";
-    case CL_INVALID_GL_OBJECT:
-      return "Invalid GL object";
-    case CL_INVALID_BUFFER_SIZE:
-      return "Invalid buffer size";
-    case CL_INVALID_MIP_LEVEL:
-      return "Invalid mip-level";
-    case CL_INVALID_GLOBAL_WORK_SIZE:
-      return "Invalid global work size";
-    case CL_INVALID_PROPERTY:
-      return "Invalid property";
-    case CL_INVALID_IMAGE_DESCRIPTOR:
-      return "Invalid image descriptor";
-    case CL_INVALID_COMPILER_OPTIONS:
-      return "Invalid compiler options";
-    case CL_INVALID_LINKER_OPTIONS:
-      return "Invalid linker options";
-    case CL_INVALID_DEVICE_PARTITION_COUNT:
-      return "Invalid device partition count";
-    case CL_INVALID_PIPE_SIZE:
-      return "Invalid pipe size";
-    case CL_INVALID_DEVICE_QUEUE:
-      return "Invalid device queue";
-    case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
-      return "Invalid GL sharegroup reference KHR";
-
-    default:
-      return "Unknown OpenCL";
-  }
-}
-
-int ChannelTypeToSizeInBytes(cl_channel_type type)
-{
-  switch (type)
-  {
-    case CL_FLOAT:
-      return 4;
-    default:
-      return 0;
-  }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
-                            cl_mem *result)
-{
-  cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
-  if (data)
-  {
-    flags |= CL_MEM_COPY_HOST_PTR;
-  }
-  cl_int error_code;
-  *result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code);
-  if (!*result)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized)
-{
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      return CL_FLOAT;
-    case DataType::INT8:
-      return normalized ? CL_SNORM_INT8 : CL_SIGNED_INT8;
-    case DataType::UINT8:
-      return normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8;
-    case DataType::INT16:
-      return normalized ? CL_SNORM_INT16 : CL_SIGNED_INT16;
-    case DataType::UINT16:
-      return normalized ? CL_UNORM_INT16 : CL_UNSIGNED_INT16;
-    case DataType::INT32:
-      return CL_SIGNED_INT32;
-    case DataType::UINT32:
-      return CL_UNSIGNED_INT32;
-    default:
-      return CL_FLOAT;
-  }
-}
-
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
-                               cl_channel_type channel_type, void *data, cl_mem *result)
-{
-  cl_image_desc desc;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = width;
-  desc.image_height = height;
-  desc.image_depth = 0;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
-  desc.num_mip_levels = 0;
-  desc.num_samples = 0;
-  desc.buffer = nullptr;
-
-  cl_image_format format;
-  format.image_channel_order = CL_RGBA;
-  format.image_channel_data_type = channel_type;
-
-  cl_mem_flags flags = CL_MEM_READ_WRITE;
-  if (data)
-  {
-    flags |= CL_MEM_COPY_HOST_PTR;
-  }
-
-  cl_int error_code;
-  *result = CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x)
-{
-  // TODO(sorokin) check perf and optimize with floor() if needed
-  // int p0 = src_x / batch_size;\n";
-  // int b0 = src_x % batch_size;\n";
-  // return p0 * stride_x * batch_size + b0 + padding_x;\n";
-  return absl::Substitute("((($0) / $1) * $2 * $1 + (($0) % $1) + $3)", src_x, batch_size, stride_x,
-                          padding_x);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.h b/runtime/onert/backend/gpu_cl/open_cl/Util.h

deleted file mode 100644 (file)

index 996c564..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-
-#include <string>
-
-#include "absl/types/span.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x);
-
-// @param n must be non negative
-// @param divisor must be greater than zero
-template <typename T, typename N> T DivideRoundUp(T n, N divisor)
-{
-  const T div = static_cast<T>(divisor);
-  const T q = n / div;
-  return n % div == 0 ? q : q + 1;
-}
-
-template <> inline uint3 DivideRoundUp(uint3 n, uint3 divisor)
-{
-  return uint3(DivideRoundUp(n.x, divisor.x), DivideRoundUp(n.y, divisor.y),
-               DivideRoundUp(n.z, divisor.z));
-}
-
-// @param number or its components must be greater than zero
-// @param n must be greater than zero
-template <typename T, typename N> T AlignByN(T number, N n) { return DivideRoundUp(number, n) * n; }
-
-std::string CLErrorCodeToString(cl_int error_code);
-
-int ChannelTypeToSizeInBytes(cl_channel_type type);
-
-template <DataType S, typename T>
-void CopyLinearFLT4(const InternalTensor<Linear, S> &src, absl::Span<T> dst)
-{
-  const int dst_depth = dst.size();
-  for (int d = 0; d < dst_depth; ++d)
-  {
-    T val;
-    for (int i = 0; i < 4; ++i)
-    {
-      const int dst_ch = d * 4 + i;
-      val[i] = dst_ch >= src.shape.v ? 0.0f : src.data[dst_ch];
-    }
-    dst[d] = val;
-  }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
-                            cl_mem *result);
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized = false);
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
-                               cl_channel_type channel_type, void *data, cl_mem *result);
-
-template <DataType S, typename T>
-void RearrangeWeightsToOHWIOGroupI4O4(const InternalTensor<OHWI, S> &weights, int out_group_size,
-                                      absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int d = 0; d < dst_groups; ++d)
-  {
-    for (int y = 0; y < weights.shape.h; ++y)
-    {
-      for (int x = 0; x < weights.shape.w; ++x)
-      {
-        for (int s = 0; s < src_slices; ++s)
-        {
-          for (int d_group = 0; d_group < out_group_size; ++d_group)
-          {
-            for (int j = 0; j < 4; ++j)
-            {
-              T filter;
-              for (int i = 0; i < 4; ++i)
-              {
-                const int s_ch = s * 4 + j;
-                const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                {
-                  const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                  filter[i] = weights.data[f_index];
-                }
-                else
-                {
-                  filter[i] = 0.0f;
-                }
-              }
-              dst[counter++] = filter;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToODHWIOGroupI4O4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
-                                       absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int d = 0; d < dst_groups; ++d)
-  {
-    for (int z = 0; z < weights.shape.d; ++z)
-    {
-      for (int y = 0; y < weights.shape.h; ++y)
-      {
-        for (int x = 0; x < weights.shape.w; ++x)
-        {
-          for (int s = 0; s < src_slices; ++s)
-          {
-            for (int d_group = 0; d_group < out_group_size; ++d_group)
-            {
-              for (int j = 0; j < 4; ++j)
-              {
-                T filter;
-                for (int i = 0; i < 4; ++i)
-                {
-                  const int s_ch = s * 4 + j;
-                  const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                  if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                  {
-                    const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
-                    filter[i] = weights.data[f_index];
-                  }
-                  else
-                  {
-                    filter[i] = 0.0f;
-                  }
-                }
-                dst[counter++] = filter;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4HWIOOGroupO4(const InternalTensor<OHWI, S> &weights, int out_group_size,
-                                      absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int j = 0; j < 4; ++j)
-  {
-    for (int y = 0; y < weights.shape.h; ++y)
-    {
-      for (int x = 0; x < weights.shape.w; ++x)
-      {
-        for (int s = 0; s < src_slices; ++s)
-        {
-          for (int d = 0; d < dst_groups; ++d)
-          {
-            for (int d_group = 0; d_group < out_group_size; ++d_group)
-            {
-              T filter;
-              for (int i = 0; i < 4; ++i)
-              {
-                const int s_ch = s * 4 + j;
-                const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                {
-                  const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                  filter[i] = weights.data[f_index];
-                }
-                else
-                {
-                  filter[i] = 0.0f;
-                }
-              }
-              dst[counter++] = filter;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4DHWIOOGroupO4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
-                                       absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int j = 0; j < 4; ++j)
-  {
-    for (int z = 0; z < weights.shape.d; ++z)
-    {
-      for (int y = 0; y < weights.shape.h; ++y)
-      {
-        for (int x = 0; x < weights.shape.w; ++x)
-        {
-          for (int s = 0; s < src_slices; ++s)
-          {
-            for (int d = 0; d < dst_groups; ++d)
-            {
-              for (int d_group = 0; d_group < out_group_size; ++d_group)
-              {
-                T filter;
-                for (int i = 0; i < 4; ++i)
-                {
-                  const int s_ch = s * 4 + j;
-                  const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                  if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                  {
-                    const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
-                    filter[i] = weights.data[f_index];
-                  }
-                  else
-                  {
-                    filter[i] = 0.0f;
-                  }
-                }
-                dst[counter++] = filter;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc

deleted file mode 100644 (file)

index 5f1103a..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/WinogradUtil.h"
-
-#include <cmath>
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace
-{
-// Matrices for Winograd trasformations were computed with the method described
-// here https://openreview.net/pdf?id=H1ZaRZVKg
-std::vector<float> GetTransposedMatrixForWinograd(int width, int height)
-{
-  const float kDelta = std::sqrt(2.0f) / 2.0f;
-  std::vector<float> px(width);
-
-  px[0] = 0.0f;
-  const int points_count = (width - 1) / 2;
-  for (int i = 0; i < points_count; ++i)
-  {
-    px[i * 2 + 1] = kDelta * (i + 1.0f);
-    px[i * 2 + 2] = -kDelta * (i + 1.0f);
-  }
-  px[width - 1] = 1.0f;
-
-  std::vector<float> py(width, 1.0f);
-  py[width - 1] = 0.0f;
-
-  std::vector<float> result(height * width);
-  for (int y = 0; y < width; ++y)
-  {
-    for (int x = 0; x < height; ++x)
-    {
-      result[x * width + y] = std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x);
-    }
-  }
-  return result;
-}
-
-std::vector<float> GetInversedMatrixForWinograd(int rank)
-{
-  auto matrix = GetTransposedMatrixForWinograd(rank, rank);
-  std::vector<float> inverted(rank * rank, 0.0f);
-  for (int i = 0; i < rank; ++i)
-  {
-    inverted[i * rank + i] = 1.0f;
-  }
-
-  for (int i = 1; i < rank - 1; ++i)
-  {
-    float inv_t = 1.0f / matrix[i * rank + i];
-    for (int x = i; x < rank; ++x)
-    {
-      matrix[i * rank + x] *= inv_t;
-    }
-    for (int x = 0; x < rank; ++x)
-    {
-      inverted[i * rank + x] *= inv_t;
-    }
-
-    for (int y = 0; y < rank; ++y)
-    {
-      if (y == i)
-        continue;
-      float t = matrix[y * rank + i];
-      for (int x = i; x < rank; ++x)
-      {
-        matrix[y * rank + x] -= t * matrix[i * rank + x];
-      }
-      for (int x = 0; x < rank; ++x)
-      {
-        inverted[y * rank + x] -= t * inverted[i * rank + x];
-      }
-    }
-  }
-
-  return inverted;
-}
-
-std::vector<float> Multiply(const std::vector<float> &a_mat, const std::vector<float> &b_mat, int m,
-                            int n, int k)
-{
-  std::vector<float> result(m * k);
-  for (int y = 0; y < m; ++y)
-  {
-    for (int x = 0; x < k; ++x)
-    {
-      float sum = 0.0f;
-      for (int i = 0; i < n; ++i)
-      {
-        sum += a_mat[y * n + i] * b_mat[i * k + x];
-      }
-      result[y * k + x] = sum;
-    }
-  }
-  return result;
-}
-} // namespace
-
-std::vector<float> AtMatrixForWinograd4x4To6x6() { return GetTransposedMatrixForWinograd(6, 4); }
-
-std::vector<float> BtMatrixForWinograd4x4To6x6() { return GetInversedMatrixForWinograd(6); }
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
-  const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
-  gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights)
-{
-  gpu_cl::OHWI dst_shape;
-  dst_shape.o = src_weights.shape.o;
-  dst_shape.h = 6;
-  dst_shape.w = 6;
-  dst_shape.i = src_weights.shape.i;
-  dst_weights->shape = dst_shape;
-  dst_weights->data.resize(dst_shape.DimensionsProduct());
-
-  auto gt_mat = GetTransposedMatrixForWinograd(6, 3);
-  std::vector<float> g_mat(gt_mat.size());
-  for (int y = 0; y < 3; ++y)
-  {
-    for (int x = 0; x < 6; ++x)
-    {
-      g_mat[x * 3 + y] = gt_mat[y * 6 + x];
-    }
-  }
-
-  for (int d = 0; d < src_weights.shape.o; ++d)
-  {
-    for (int s = 0; s < src_weights.shape.i; ++s)
-    {
-      std::vector<float> in_vals(9);
-      for (int y = 0; y < 3; ++y)
-      {
-        for (int x = 0; x < 3; ++x)
-        {
-          const int f_index = src_weights.shape.LinearIndex({d, y, x, s});
-          in_vals[y * 3 + x] = src_weights.data[f_index];
-        }
-      }
-
-      auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3);
-      auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6);
-      for (int y = 0; y < 6; ++y)
-      {
-        for (int x = 0; x < 6; ++x)
-        {
-          const int f_index = dst_shape.LinearIndex({d, y, x, s});
-          dst_weights->data[f_index] = out_vals[y * 6 + x];
-        }
-      }
-    }
-  }
-}
-
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h

deleted file mode 100644 (file)

index 32e2176..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-// Matrices for Winograd trasformations received with method described here
-// https://openreview.net/pdf?id=H1ZaRZVKg
-
-// returns A transposed matrix(6 * 4) as array (24 values) for Winograd4x4To6x6
-std::vector<float> AtMatrixForWinograd4x4To6x6();
-
-// returns B transposed matrix(6 * 6) as array (36 values) for Winograd4x4To6x6
-std::vector<float> BtMatrixForWinograd4x4To6x6();
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
-  const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
-  gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights);
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc

deleted file mode 100644 (file)

index 847c2a2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkgroupSelection.h"
-
-#include <math.h>
-
-#include <set>
-#include <vector>
-
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-template <typename T>
-void AddCornerCases(const T &grid, int max_work_group_total_size, const T &max_work_group_sizes,
-                    WorkGroupSizeAlignment x_alignment, WorkGroupSizeAlignment y_alignment,
-                    WorkGroupSizeAlignment z_alignment, std::vector<T> *work_groups)
-{
-  for (int x = 1; x <= 4; ++x)
-  {
-    for (int y = 1; y <= 4; ++y)
-    {
-      for (int z = 1; z <= 4; ++z)
-      {
-        u_int32_t wg_x = DivideRoundUp(grid.x, x);
-        u_int32_t wg_y = DivideRoundUp(grid.y, y);
-        u_int32_t wg_z = DivideRoundUp(grid.z, z);
-        if (wg_x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
-            wg_y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
-            wg_z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
-            wg_x * wg_y * wg_z > static_cast<u_int32_t>(max_work_group_total_size))
-        {
-          continue;
-        }
-        if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % wg_x != 0)
-        {
-          continue;
-        }
-        if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % wg_y != 0)
-        {
-          continue;
-        }
-        if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % wg_z != 0)
-        {
-          continue;
-        }
-        work_groups->push_back({wg_x, wg_y, wg_z});
-      }
-    }
-  }
-
-  // this will add at least {1, 1, 1} always.
-  for (u_int32_t x = 1; x <= 4; ++x)
-  {
-    for (u_int32_t y = 1; y <= 4; ++y)
-    {
-      for (u_int32_t z = 1; z <= 4; ++z)
-      {
-        if (x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
-            y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
-            z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
-            x * y * z > static_cast<u_int32_t>(max_work_group_total_size))
-        {
-          continue;
-        }
-        if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % x != 0)
-        {
-          continue;
-        }
-        if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % y != 0)
-        {
-          continue;
-        }
-        if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % z != 0)
-        {
-          continue;
-        }
-        work_groups->push_back({x, y, z});
-      }
-    }
-  }
-}
-
-std::vector<int> GetDivisors(int number)
-{
-  const int max_divisor = static_cast<int>(sqrt(number));
-  std::vector<int> divisors;
-  // we don't know the number of dividers, so it is just heuristic.
-  divisors.reserve(max_divisor / 3 + 1);
-  for (int i = 1; i <= max_divisor; ++i)
-  {
-    const int d = number / i;
-    if (i * d == number)
-    {
-      divisors.push_back(i);
-      if (d != i)
-      {
-        divisors.push_back(d);
-      }
-    }
-  }
-  return divisors;
-}
-
-std::vector<int> GetDivisorsForRange(int number, int range)
-{
-  const int last_number = number + range;
-  const int max_divisor = static_cast<int>(sqrt(last_number));
-  std::set<int> divisors;
-  for (int i = 1; i <= max_divisor; ++i)
-  {
-    const int reminder = number % i;
-    // iterate through numbers that divisible by i in our range;
-    const int first_number = number + (i - reminder) % i;
-    if (first_number <= last_number)
-    {
-      divisors.insert(i);
-    }
-    for (int j = first_number; j <= last_number; j += i)
-    {
-      const int d = j / i;
-      if (d != i)
-      {
-        divisors.insert(d);
-      }
-    }
-  }
-  return std::vector<int>(divisors.begin(), divisors.end());
-}
-
-} // namespace
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment)
-{
-  if (z_alignment == WorkGroupSizeAlignment::PRECISE)
-  {
-    // we will use for potential sizes, sizes that cover grid precisely
-    // work group size * k (k is integer) == grid_size
-    return GetDivisors(number);
-  }
-  else
-  {
-    // when we chose work group size we can use work group size that
-    //   work group size * k (k is integer) != grid_size (slightly bigger)
-    // so in this heuristic we trying to find potential size, that satisfies
-    //   to this : work group size * k (k is integer) <= grid_size + 5
-    //   and this : work group size * k (k is integer) >= grid_size
-    return GetDivisorsForRange(number, 5);
-  }
-}
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
-                       const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
-                       WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<T> work_groups;
-  work_groups.reserve(64);
-
-  std::vector<int> sizes_x = GetPossibleSizes(grid.x, x_alignment);
-  std::vector<int> sizes_y = GetPossibleSizes(grid.y, y_alignment);
-  std::vector<int> sizes_z = GetPossibleSizes(grid.z, z_alignment);
-
-  for (auto x : sizes_x)
-  {
-    if (static_cast<int>(x) > static_cast<int>(max_work_group_sizes.x))
-      continue;
-    for (auto y : sizes_y)
-    {
-      if (static_cast<int>(y) > static_cast<int>(max_work_group_sizes.y))
-        continue;
-      for (auto z : sizes_z)
-      {
-        if (static_cast<int>(z) > static_cast<int>(max_work_group_sizes.z))
-          continue;
-        const int work_group_size = x * y * z;
-        if (work_group_size < min_work_group_total_size ||
-            work_group_size > max_work_group_total_size)
-          continue;
-        work_groups.push_back({x, y, z});
-      }
-    }
-  }
-
-  return work_groups;
-}
-
-// Specializations of GenerateWorkGroupSizes for int3 and uint3
-
-template std::vector<int3> GenerateWorkGroupSizes(const int3 &grid, int min_work_group_total_size,
-                                                  int max_work_group_total_size,
-                                                  const int3 &max_work_group_sizes,
-                                                  WorkGroupSizeAlignment x_alignment,
-                                                  WorkGroupSizeAlignment y_alignment,
-                                                  WorkGroupSizeAlignment z_alignment);
-
-template std::vector<uint3> GenerateWorkGroupSizes(const uint3 &grid, int min_work_group_total_size,
-                                                   int max_work_group_total_size,
-                                                   const uint3 &max_work_group_sizes,
-                                                   WorkGroupSizeAlignment x_alignment,
-                                                   WorkGroupSizeAlignment y_alignment,
-                                                   WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
-                                         const int max_work_group_invocations,
-                                         std::vector<T> *work_groups)
-{
-  auto alignment = WorkGroupSizeAlignment::PRECISE;
-  *work_groups =
-    GenerateWorkGroupSizes<T>(grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
-                              max_work_group_size, alignment, alignment, alignment);
-  // If the grid parameter too small, method below cannot generate workgroups.
-  if (work_groups->empty())
-  {
-    AddCornerCases(grid, max_work_group_invocations, max_work_group_size, alignment, alignment,
-                   alignment, work_groups);
-  }
-}
-
-// Specializations of GenerateWorkGroupSizesAlignedToGrid for int3 and uint3
-
-template void GenerateWorkGroupSizesAlignedToGrid(const int3 &grid, const int3 &max_work_group_size,
-                                                  const int max_work_group_invocations,
-                                                  std::vector<int3> *work_groups);
-
-template void GenerateWorkGroupSizesAlignedToGrid(const uint3 &grid,
-                                                  const uint3 &max_work_group_size,
-                                                  const int max_work_group_invocations,
-                                                  std::vector<uint3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h

deleted file mode 100644 (file)

index b0702ac..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// PRECISE assume that WorkGroupSize * k = GridSize;
-// NO_ALIGNMENT no restrictions;
-// We need PRECISE when we don't have check in kernel for boundaries
-// If we have the check, we can use PRECISE or NO_ALIGNMENT as well.
-enum class WorkGroupSizeAlignment
-{
-  PRECISE,
-  NO_ALIGNMENT
-};
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment);
-
-// Specializations exist for int3 and uint3 in the .cc file
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
-                       const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
-                       WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
-                                         const int max_work_group_invocations,
-                                         std::vector<T> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc

deleted file mode 100644 (file)

index 09100fe..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Add.h"
-
-#include <cstring>
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Util.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
-                       int dst_channels)
-{
-  GPUOperation add(definition);
-  int dst_depth = DivideRoundUp(dst_channels, 4);
-  int src0_depth = DivideRoundUp(channels[0], 4);
-  add.elementwise_ = true;
-  add.linkable_ = dst_depth == src0_depth;
-  if (src0_depth < dst_depth)
-  {
-    add.check_src_channels_size_ = true;
-  }
-  for (uint32_t i = 1; i < definition.src_tensors.size(); ++i)
-  {
-    const std::string tensor_name = absl::StrCat("src_data_", i);
-    auto src_desc = definition.src_tensors[i];
-    if (definition.IsBatchSupported())
-    {
-      src_desc.SetStateVar("BatchedWidth", "true");
-    }
-    add.AddSrcTensor(tensor_name, src_desc);
-    add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n";
-    add.code_ += "  in_out_value += args." + tensor_name + ".Read(X_COORD, Y_COORD, S_COORD);\n";
-    add.code_ += "}\n";
-  }
-  return add;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h

deleted file mode 100644 (file)

index 2335a90..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-
-#include <string>
-#include <vector>
-
-#include "GpuOperation.h"
-#include "open_cl/Operations.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Add operation supports not equal tensors on input (for possibility to
-// remove Padding operation with zeroes in channels dimension)
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
-                       int dst_channels);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc

deleted file mode 100644 (file)

index 1b9014f..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvBuffer1x1.h"
-
-#include <array>
-#include <string>
-#include <utility>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// element_size must be 1, 2 or 4
-// 1 - is FLT4
-// 2 - is FLT8
-// 4 - is FLT16
-// This function generates code for arithmetic part of convolution
-std::string GetComputationPart(const int3 &block_size, int element_size,
-                               CalculationsPrecision precision)
-{
-  const std::string hexes[16] = {"0", "1", "2", "3", "4", "5", "6", "7",
-                                 "8", "9", "a", "b", "c", "d", "e", "f"};
-  std::string c;
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    c += "    FLT16 W" + z_s + " = weights_cache[" + z_s + "];\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      for (int x = 0; x < block_size.x; ++x)
-      {
-        std::string s_index = std::to_string(y * block_size.x + x);
-        for (int e = 0; e < element_size; ++e)
-        {
-          std::string r_index = z_s + std::to_string(y) + std::to_string(x * element_size + e);
-          const std::string f0 = "W" + z_s + ".s0123";
-          const std::string f1 = "W" + z_s + ".s4567";
-          const std::string f2 = "W" + z_s + ".s89ab";
-          const std::string f3 = "W" + z_s + ".scdef";
-          switch (precision)
-          {
-            case CalculationsPrecision::F32:
-            case CalculationsPrecision::F16:
-              c += "    r" + r_index + " += " + f0 + " * s" + s_index + ".s" + hexes[e * 4 + 0] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f3 + " * s" + s_index + ".s" + hexes[e * 4 + 3] +
-                   ";\n";
-              break;
-            case CalculationsPrecision::F32_F16:
-              c += "    r" + r_index + " += convert_float4(" + f0 + " * s" + s_index + ".s" +
-                   hexes[e * 4 + 0] + " + " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
-                   " + " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] + " + " + f3 + " * s" +
-                   s_index + ".s" + hexes[e * 4 + 3] + ");\n";
-              break;
-          }
-        }
-      }
-    }
-  }
-  return c;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
-                                        const OperationDef &definition, const BHWC &shape, int,
-                                        int dst_depth)
-{
-  ConvBuffer1x1::ConvParams conv_params;
-  conv_params.element_size = 4;
-  conv_params.block_size = int3(1, 1, 1);
-  if (!device_info.IsMali())
-  {
-    return conv_params;
-  }
-  bool can_use_flt8 =
-    (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32;
-  bool is_midgard = device_info.IsMali() && device_info.mali_info.IsMidgard();
-  if (is_midgard)
-  {
-    if (can_use_flt8)
-    {
-      conv_params.element_size = 8;
-    }
-    if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8)
-    {
-      conv_params.block_size.x = 2;
-    }
-    return conv_params;
-  }
-
-  int task_size = shape.w * shape.b * shape.h * dst_depth;
-  int block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
-
-  if (!can_use_flt8 && block_size > 4)
-  {
-    block_size = 4;
-  }
-
-  if (can_use_flt8 && block_size >= 2)
-  {
-    conv_params.element_size = 8;
-    block_size /= 2;
-  }
-  if (block_size == 4)
-  {
-    conv_params.block_size.x = 2;
-    if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32)
-    {
-      conv_params.block_size.y = 2;
-    }
-    else
-    {
-      conv_params.block_size.z = 2;
-    }
-  }
-  else if (block_size == 2)
-  {
-    if (dst_depth >= 32)
-    {
-      conv_params.block_size.z = 2;
-    }
-    else
-    {
-      conv_params.block_size.x = 2;
-    }
-  }
-
-  return conv_params;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
-                                        const OperationDef &definition, int, int)
-{
-  ConvBuffer1x1::ConvParams conv_params;
-  conv_params.element_size = 4;
-  conv_params.block_size = int3(1, 1, 1);
-  if (device_info.IsMali() && definition.precision == CalculationsPrecision::F16 &&
-      device_info.compute_units_count <= 4)
-  {
-    conv_params.block_size.x *= 2;
-  }
-  return conv_params;
-}
-
-} // namespace
-
-ConvBuffer1x1::ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params)
-  : GPUOperation(definition), conv_params_(conv_params)
-{
-  code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_);
-  work_group_size_ = int3(2, 4, 1);
-}
-
-ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1 &&operation)
-  : GPUOperation(std::move(operation)), conv_params_(std::move(operation.conv_params_))
-{
-}
-
-ConvBuffer1x1 &ConvBuffer1x1::operator=(ConvBuffer1x1 &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(conv_params_, operation.conv_params_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string ConvBuffer1x1::GenerateConvBuffer1x1(const OperationDef &op_def,
-                                                 const ConvBuffer1x1::ConvParams &conv_params,
-                                                 Arguments *)
-{
-  auto src_desc = op_def.src_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  if (conv_params_.element_size == 8)
-  {
-    src_desc.SetStateVar("ElementsX2", "true");
-  }
-  else if (conv_params_.element_size == 16)
-  {
-    src_desc.SetStateVar("ElementsX4", "true");
-  }
-  AddSrcTensor("src_tensor", src_desc);
-  if (op_def.src_tensors.size() == 2)
-  {
-    // dynamic weights
-    BufferDescriptor desc;
-    desc.element_type = op_def.src_tensors[1].data_type;
-    desc.element_size = 16;
-    desc.memory_type = MemoryType::GLOBAL;
-    AddSrcBuffer("weights", desc);
-  }
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddDstTensor("dst_tensor", dst_desc);
-
-  std::string c = GetCommonDefines(op_def.precision);
-  switch (op_def.precision)
-  {
-    case CalculationsPrecision::F32:
-      c += "#define FLT8 float8\n";
-      c += "#define FLT16 float16\n";
-      break;
-    case CalculationsPrecision::F32_F16:
-    case CalculationsPrecision::F16:
-      c += "#define FLT8 half8\n";
-      c += "#define FLT16 half16\n";
-      break;
-  }
-
-  const int3 block_size = conv_params.block_size;
-  const int element_size = conv_params.element_size / 4;
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0) * " + std::to_string(block_size.x * element_size) + ";\n";
-  c += "  int X_SRC = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
-  c += "  int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n";
-  c += "  int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) return;\n";
-  if (conv_params.different_weights_for_height)
-  {
-    c += "  __global FLT16* weights_cache = args.weights.GetPtr() + (Z * "
-         "args.src_tensor.Height() + "
-         "Y * " +
-         std::to_string(block_size.z) +
-         ") * "
-         "args.src_tensor.Slices();\n";
-  }
-  else
-  {
-    c += "  __global FLT16* weights_cache = args.weights.GetPtr() + Z * "
-         "args.src_tensor.Slices();\n";
-  }
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    c += "  ACCUM_FLT4 bias_val_" + z_s + " = TO_ACCUM_TYPE(args.biases.Read(Z + " + z_s + "));\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      for (int x = 0; x < block_size.x * element_size; ++x)
-      {
-        c += "  ACCUM_FLT4 r" + z_s + std::to_string(y) + std::to_string(x) + " = bias_val_" + z_s +
-             ";\n";
-      }
-    }
-  }
-  for (int x = 0; x < block_size.x; ++x)
-  {
-    std::string x_s = std::to_string(x);
-    c += "  int xc" + x_s + " = min(X_SRC + " + std::to_string(x) +
-         ", args.src_tensor.Width() - 1);\n";
-  }
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    c += "  int yc" + y_s + " = min(Y + " + y_s + ", args.src_tensor.Height() - 1);\n";
-  }
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      std::string x_s = std::to_string(x);
-      std::string i_s = std::to_string(y * block_size.x + x);
-      c += "  int src_addr_" + i_s + " = (yc" + y_s + ") * args.src_tensor.Width() + (xc" + x_s +
-           ");\n";
-    }
-  }
-  c += "  for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n";
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      std::string x_s = std::to_string(x);
-      std::string i_s = std::to_string(y * block_size.x + x);
-      c += "    FLT" + std::to_string(element_size * 4) + " s" + i_s +
-           " = args.src_tensor.Read(src_addr_" + i_s + ");\n";
-    }
-  }
-  c += GetComputationPart(block_size, element_size, op_def.precision);
-  for (int i = 0; i < block_size.x * block_size.y; ++i)
-  {
-    std::string i_s = std::to_string(i);
-    c += "    src_addr_" + i_s + " += args.src_tensor.SliceStride();\n";
-  }
-  c += "    weights_cache += " + std::to_string(block_size.z) + ";\n";
-  c += "  }\n"; // SRC_SLICES
-
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    if (z != 0)
-    {
-      c += "  if (Z + " + z_s + " >= args.dst_tensor.Slices()) return;\n";
-    }
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string y_s = std::to_string(y);
-      for (int x = 0; x < block_size.x * element_size; ++x)
-      {
-        const std::string x_s = std::to_string(x);
-        c += "  if (X + " + x_s + " < args.dst_tensor.Width() && Y + " + y_s +
-             " < args.dst_tensor.Height()) {\n";
-        c += "    FLT4 res = TO_FLT4(r" + z_s + y_s + x_s + ");\n";
-        c += "    args.dst_tensor.Write(res, X + " + x_s + ", Y + " + y_s + ", Z + " + z_s + ");\n";
-        c += "  }\n";
-      }
-    }
-  }
-  c += "}\n";
-  return c;
-}
-
-int3 ConvBuffer1x1::GetGridSize() const
-{
-  const int dst_width_elements =
-    DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4));
-  const int grid_x = DivideRoundUp(dst_width_elements, conv_params_.block_size.x);
-  const int grid_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-  const int grid_z = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.z);
-  return int3(grid_x, grid_y, grid_z);
-}
-
-void ConvBuffer1x1::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                                const DeviceInfo &device_info,
-                                                const KernelInfo &kernel_info,
-                                                std::vector<int3> *work_groups) const
-{
-  GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr)
-{
-  auto src_storage_type = definition.src_tensors[0].storage_type;
-  return src_storage_type == TensorStorageType::BUFFER && attr.weights.shape.w == 1 &&
-         attr.weights.shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
-         attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
-         attr.padding.appended.h == 0;
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
-                              const Convolution2DAttributes &attr)
-{
-  auto src_storage_type = definition.src_tensors[0].storage_type;
-  return src_storage_type == TensorStorageType::BUFFER && weights_shape.w == 1 &&
-         weights_shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
-         attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
-         attr.padding.appended.h == 0;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const Convolution2DAttributes &attr, const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const FullyConnectedAttributes &attr, const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  conv_params.block_size.x *= conv_params.block_size.y;
-  conv_params.block_size.y = 1;
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                              const OperationDef &definition,
-                                              const Convolution2DAttributes &attr,
-                                              const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  conv_params.block_size.x *= conv_params.block_size.y;
-  conv_params.block_size.y = 1;
-  conv_params.different_weights_for_height = true;
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadDataForWinograd4x4To6x6(attr.weights);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                const OperationDef &definition,
-                                                const Convolution2DAttributes &attr,
-                                                const BHWC &weights_shape, const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
-  const int src_depth = DivideRoundUp(weights_shape.c, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (dst_shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *dst_shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadBiases(attr.bias);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h

deleted file mode 100644 (file)

index 0abd605..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvBuffer1x1 : public GPUOperation
-{
-public:
-  ConvBuffer1x1() = default;
-
-  // Move only
-  ConvBuffer1x1(ConvBuffer1x1 &&operation);
-  ConvBuffer1x1 &operator=(ConvBuffer1x1 &&operation);
-  ConvBuffer1x1(const ConvBuffer1x1 &) = delete;
-  ConvBuffer1x1 &operator=(const ConvBuffer1x1 &) = delete;
-
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  int3 GetGridSize() const override;
-
-  ConvWeightsDescription GetConvWeightsDescription() const
-  {
-    ConvWeightsDescription desc;
-    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
-    desc.output_group_size = conv_params_.block_size.z;
-    return desc;
-  }
-
-  struct ConvParams
-  {
-    int3 block_size = int3(1, 1, 1);
-    int element_size = 4; // can be 4, 8 or 16
-
-    // By default in 2d convolution we have the same weights for WH dims, but in
-    // some cases we need separate weights for H dimension and convolution
-    // kernel requires very small modifications to support it.
-    bool different_weights_for_height = false;
-  };
-
-private:
-  ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params);
-  friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const Convolution2DAttributes &attr, const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const FullyConnectedAttributes &attr, const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                                       const OperationDef &definition,
-                                                       const Convolution2DAttributes &attr,
-                                                       const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                         const OperationDef &definition,
-                                                         const Convolution2DAttributes &attr,
-                                                         const BHWC &weights_shape,
-                                                         const BHWC *dst_shape);
-
-  template <DataType T>
-  void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
-  template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadBiases(const InternalTensor<Linear, T> &biases);
-
-  std::string GenerateConvBuffer1x1(const OperationDef &op_def,
-                                    const ConvBuffer1x1::ConvParams &conv_params, Arguments *args);
-
-  ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvBuffer1x1::UploadData(const InternalTensor<OHWI, T> &weights,
-                               const InternalTensor<Linear, T> &biases)
-{
-  UploadWeights(weights);
-  UploadBiases(biases);
-}
-
-template <DataType T>
-void ConvBuffer1x1::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
-  InternalTensor<OHWI, T> wino_weights;
-  RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
-  UploadWeights(wino_weights);
-  InternalTensor<Linear, DataType::FLOAT32> bias;
-  bias.shape = Linear(weights.shape.o);
-  bias.data.resize(weights.shape.o, 0.0f);
-  UploadBiases(bias);
-}
-
-template <DataType T> void ConvBuffer1x1::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
-  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
-  const int float4_size = sizeof(float4);
-  // TODO
-  // f32_weights ? sizeof(float4) : sizeof(half4);
-
-  const int dst_depth_aligned = AlignByN(dst_depth, conv_params_.block_size.z);
-  const int elements_count = weights.shape.h * weights.shape.w * src_depth * dst_depth_aligned * 4;
-
-  BufferDescriptor desc;
-  desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-  desc.element_size = 16;
-  desc.memory_type = MemoryType::GLOBAL;
-  desc.size = float4_size * elements_count;
-  desc.data.resize(desc.size);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
-    RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
-                                     absl::MakeSpan(ptr, elements_count));
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
-  //     RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
-  //                                      absl::MakeSpan(ptr, elements_count));
-  //   }
-
-  args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvBuffer1x1::UploadBiases(const InternalTensor<Linear, T> &biases)
-{
-  TensorLinearDescriptor desc;
-  desc.storage_type = LinearStorageType::BUFFER;
-  desc.element_type = definition_.GetDataType();
-  int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
-  desc.UploadLinearData(biases, depth);
-  args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr);
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
-                              const Convolution2DAttributes &attr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const Convolution2DAttributes &attr, const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const FullyConnectedAttributes &attr,
-                                  const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                const OperationDef &definition,
-                                                const Convolution2DAttributes &attr,
-                                                const BHWC &weights_shape,
-                                                const BHWC *dst_shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                              const OperationDef &definition,
-                                              const Convolution2DAttributes &attr,
-                                              const BHWC *shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h

deleted file mode 100644 (file)

index 4700381..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class ConvWeightsLayout
-{
-  kUnknown,
-  kOHWIOGroupI4O4,
-};
-
-struct ConvWeightsDescription
-{
-  ConvWeightsLayout layout;
-  int output_group_size;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc

deleted file mode 100644 (file)

index 0a51bab..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvConstants.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-// Adreno can provide up to ~3-4KB of constant memory, but in some cases even
-// 3KB can have very bad performance.
-int GetAdrenoOptimalMaxConstantSize(int gpu_version)
-{
-  if (gpu_version < 600)
-  {
-    return 256 * 10; // 2.5KB
-  }
-  else
-  {
-    return 256 * 14; // 3.5KB
-  }
-}
-
-int GetOptimalMaxConstantSize(const DeviceInfo &info)
-{
-  if (!info.IsAdreno())
-  {
-    // In general we do not expect that this kernel will be used with non Adreno
-    // so as it tuned for __constant memory that have big profit on Adreno
-    return 1024; // 1KB
-  }
-  else
-  {
-    return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version);
-  }
-}
-
-std::string GenerateConvolutionConstantCode(const OperationDef &op_def, const OHWI &weights_shape,
-                                            bool stride_correction, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  std::string c = GetCommonDefines(op_def.precision);
-
-  const int out_z = DivideRoundUp(weights_shape.o, 4);
-  const std::string kOutZ = std::to_string(out_z);
-  const int src_depth = DivideRoundUp(weights_shape.i, 4);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  switch (op_def.precision)
-  {
-    case CalculationsPrecision::F32:
-    case CalculationsPrecision::F16:
-      c += "#define CONV4(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \\\n";
-      c += "  R += SRC.z * F[i + 2]; \\\n";
-      c += "  R += SRC.w * F[i + 3];   \n";
-
-      c += "#define CONV3(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \\\n";
-      c += "  R += SRC.z * F[i + 2]; \n";
-
-      c += "#define CONV2(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \n";
-
-      c += "#define CONV1(R, SRC, F, i) \\\n";
-      c += "  R += SRC * F[i + 0]; \n";
-      break;
-    case CalculationsPrecision::F32_F16:
-      c += "#define CONV4(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
-      c += " + SRC.z * F[i + 2] + SRC.w * F[i + 3]);\n";
-
-      c += "#define CONV3(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
-      c += " + SRC.z * F[i + 2]);\n";
-
-      c += "#define CONV2(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]);\n";
-
-      c += "#define CONV1(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC * F[i + 0]);\n";
-      break;
-  }
-
-  const std::string postfixes[] = {".x", ".xy", ".xyz", ""};
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
-       "return;\n";
-  if (stride_correction)
-  {
-    c += "  int start_x = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int start_x = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int start_x = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int start_y = Y * args.stride_y + args.padding_y;\n";
-  c += "  ACCUM_FLT4 r[" + kOutZ + "];\n";
-  c += "  for (int i = 0; i < " + kOutZ + "; ++i) {\n";
-  c += "    r[i] = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  }\n";
-  int filters_counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    const int ch_count = std::min(4, weights_shape.i - s * 4);
-    const std::string s_conv = "CONV" + std::to_string(ch_count);
-    const std::string s_count = ch_count == 1 ? "" : std::to_string(ch_count);
-    const std::string s_type = absl::StrCat("FLT", s_count);
-    const std::string s_postfix = postfixes[ch_count - 1];
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    for (int ky = 0; ky < weights_shape.h; ++ky)
-    {
-      std::string s_y = absl::StrCat("(start_y + ", ky, " * args.dilation_y)");
-      if (manual_clamp)
-      {
-        c += "  {\n";
-        c += "  bool y_out = " + s_y + " < 0 || " + s_y + " >= args.src_tensor.Height();\n";
-      }
-      for (int kx = 0; kx < weights_shape.w; ++kx)
-      {
-        c += "  {\n";
-        std::string s_x = absl::StrCat("(start_x + ", kx, " * " + dilation_x + ")");
-        if (manual_clamp)
-        {
-          c += "    bool x_out = " + s_x + "< 0 || " + s_x + ">= args.src_tensor.Width();\n";
-          c += "    " + s_type + " src = x_out || y_out ?";
-          c += "(" + s_type + ")(0.0) : args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
-               std::to_string(s) + ")" + s_postfix + ";\n";
-        }
-        else
-        {
-          c += "    " + s_type + " src = args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
-               std::to_string(s) + ")" + s_postfix + ";\n";
-        }
-        for (int d = 0; d < out_z; ++d)
-        {
-          c += "    " + s_conv + "(r[" + std::to_string(d) + "], src, args.weigths.GetPtr(),";
-          c += " " + std::to_string(filters_counter) + ");\n";
-          filters_counter += ch_count;
-        }
-        c += "  }\n";
-      }
-      if (manual_clamp)
-      {
-        c += "  }\n";
-      }
-    }
-  }
-  for (int i = 0; i < out_z; ++i)
-  {
-    std::string s_i = std::to_string(i);
-    c += "  {\n";
-    c += "    FLT4 res = TO_FLT4(r[" + s_i + "]) + args.biases.Read(" + s_i + ");\n";
-    c += "  args.dst_tensor.Write(res, X, Y, " + s_i + ");\n";
-    c += "  }\n";
-  }
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr)
-{
-  if (device_info.IsAMD() && definition.precision != CalculationsPrecision::F32 &&
-      definition.src_tensors[0].storage_type != TensorStorageType::BUFFER)
-  {
-    // BUG, some AMD gpus crashe without it
-    return false;
-  }
-
-  const auto &w_shape = attr.weights.shape;
-  const int dst_channels = AlignByN(w_shape.o, 4);
-  const int filters_count = w_shape.i * dst_channels * w_shape.h * w_shape.w;
-  const int float_size = sizeof(float);
-  // TODO F32 and F16
-  // definition.precision == CalculationsPrecision::F32 ? sizeof(float) : sizeof(half);
-  const int filters_buffer_size = filters_count * float_size;
-  const int kConstantMaxSize = GetOptimalMaxConstantSize(device_info);
-  const int flt4_registers = DivideRoundUp(w_shape.o, 4);
-  return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
-}
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
-                                 const Convolution2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  UploadWeightsForConvConstants(attr.weights, definition.precision, &op);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
-
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ =
-    GenerateConvolutionConstantCode(definition, attr.weights.shape, stride_correction, &op);
-  if (definition.precision == CalculationsPrecision::F16 && device_info.IsAdreno3xx())
-  {
-    op.compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
-  }
-  if (definition.precision != CalculationsPrecision::F32 && device_info.IsPowerVR())
-  {
-    // BUG, some PowerVRs (GE8320) produce incorrect result without it
-    op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE);
-  }
-
-  TensorLinearDescriptor desc;
-  desc.storage_type = LinearStorageType::BUFFER;
-  desc.element_type = definition.GetDataType();
-  desc.memory_type = MemoryType::CONSTANT;
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h

deleted file mode 100644 (file)

index be6670c..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForConvConstants(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  int counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    for (int y = 0; y < kernel_y; ++y)
-    {
-      for (int x = 0; x < kernel_x; ++x)
-      {
-        for (int d = 0; d < dst_depth; ++d)
-        {
-          const int channels_count = std::min(4, weights.shape.i - s * 4);
-          T filters[4];
-          for (int i = 0; i < 4; ++i)
-          {
-            for (int j = 0; j < channels_count; ++j)
-            {
-              const int s_ch = s * 4 + j;
-              const int d_ch = d * 4 + i;
-              if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-              {
-                const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                filters[i][j] = weights.data[f_index];
-              }
-              else
-              {
-                filters[i][j] = 0.0f;
-              }
-            }
-          }
-          T filters_new[4];
-          for (int i = 0; i < 4; ++i)
-          {
-            for (int j = 0; j < 4; ++j)
-            {
-              filters_new[i][j] = filters[j][i];
-            }
-          }
-          for (int i = 0; i < channels_count; ++i)
-          {
-            dst[counter++] = filters_new[i];
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForConvConstants(const InternalTensor<OHWI, T> &weights,
-                                   CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  const bool f32_weights = precision == CalculationsPrecision::F32;
-  const int float_size = f32_weights ? 4 : 2;
-  const int float_count = weights.shape.i * dst_depth * 4 * kernel_x * kernel_y;
-
-  BufferDescriptor desc;
-  desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-  desc.element_size = 4;
-  desc.memory_type = MemoryType::CONSTANT;
-  desc.size = float_size * float_count;
-  desc.data.resize(desc.size);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
-    RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
-  //     RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
-  //   }
-
-  op->args_.AddObject("weigths", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr);
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
-                                 const Convolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc

deleted file mode 100644 (file)

index 5cb0c27..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc
+++ /dev/null
@@ -1,1653 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvPowervr.h"
-
-#include <algorithm>
-#include <string>
-#include <utility>
-
-#include "absl/strings/substitute.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GenerateUploadByThreads(const std::string &local_ptr_name,
-                                    const std::string &global_ptr_name,
-                                    const std::string &global_offset_name,
-                                    const std::string &lid_name, int total_work_items,
-                                    int elements_to_upload)
-{
-  std::string c;
-  std::string offset = global_offset_name.empty() ? "" : global_offset_name + " + ";
-  const int groups = elements_to_upload / total_work_items;
-  const int reminder = elements_to_upload % total_work_items;
-  for (int i = 0; i < groups; ++i)
-  {
-    c += "    " + local_ptr_name + "[" + lid_name + " + " + std::to_string(total_work_items * i) +
-         "] = " + global_ptr_name + "[" + offset + lid_name + " + " +
-         std::to_string(total_work_items * i) + "];\n";
-  }
-  if (reminder != 0)
-  {
-    c += "    if (" + lid_name + " < " + std::to_string(reminder) + ") {\n";
-    c += "      " + local_ptr_name + "[" + lid_name + " + " +
-         std::to_string(total_work_items * groups) + "] = " + global_ptr_name + "[" + offset +
-         lid_name + " + " + std::to_string(total_work_items * groups) + "];\n";
-    c += "    }\n";
-  }
-  return c;
-}
-
-std::string GenerateAsyncUpload(const std::string &local_ptr_name,
-                                const std::string &global_ptr_name,
-                                const std::string &global_offset_name, int elements_to_upload)
-{
-  std::string c;
-  std::string offset = global_offset_name.empty() ? "" : " + " + global_offset_name;
-  c += "    async_work_group_copy(" + local_ptr_name + ", " + global_ptr_name + offset + ", " +
-       std::to_string(elements_to_upload) + ", 0);\n";
-  return c;
-}
-
-std::string GenerateBlockCoords(const int4 &block_size, const int3 &work_group_launch_order,
-                                bool linear_spatial, bool need_depth)
-{
-  std::string c;
-  int3 launch_remap;
-  launch_remap[work_group_launch_order.x] = 0;
-  launch_remap[work_group_launch_order.y] = 1;
-  launch_remap[work_group_launch_order.z] = 2;
-  if (linear_spatial)
-  {
-    if (work_group_launch_order[0] == 0)
-    {
-      c += "  int linear_spatial = get_global_id(0);\n";
-    }
-    else
-    {
-      c += "  int linear_spatial = get_group_id(" + std::to_string(launch_remap[0]) +
-           ") * get_local_size(0) + get_local_id(0);\n";
-    }
-    if (need_depth)
-    {
-      c += "  int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
-           ";\n";
-      c += "  linear_spatial = linear_spatial / args.task_size_x;\n";
-      c += "  int DST_Y = (linear_spatial % args.task_size_y) * " + std::to_string(block_size.y) +
-           ";\n";
-      c += "  int DST_Z = (linear_spatial / args.task_size_y) * " + std::to_string(block_size.z) +
-           ";\n";
-    }
-    else
-    {
-      c += "  int DST_Y = (linear_spatial / args.task_size_x) * " + std::to_string(block_size.y) +
-           ";\n";
-      c += "  int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
-           ";\n";
-    }
-    if (work_group_launch_order[1] == 1)
-    {
-      c += "  int DST_S = get_global_id(1) * " + std::to_string(block_size.w) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_S = (get_group_id(" + std::to_string(launch_remap[1]) +
-           ") * get_local_size(1) + get_local_id(1)) * " + std::to_string(block_size.w) + ";\n";
-    }
-  }
-  else
-  {
-    if (work_group_launch_order[0] == 0)
-    {
-      c += "  int DST_X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_X = (get_group_id(" + std::to_string(launch_remap[0]) +
-           ") * get_local_size(0) + get_local_id(0)) * " + std::to_string(block_size.x) + ";\n";
-    }
-    std::string global_id_1;
-    if (work_group_launch_order[1] == 1)
-    {
-      global_id_1 = "get_global_id(1)";
-    }
-    else
-    {
-      global_id_1 = "(get_group_id(" + std::to_string(launch_remap[1]) +
-                    ") * get_local_size(1) + get_local_id(1))";
-    }
-    if (need_depth)
-    {
-      c += "  int linear_id_1 = " + global_id_1 + ";\n";
-      c +=
-        "  int DST_Z = (linear_id_1 / args.task_size_y) * " + std::to_string(block_size.z) + ";\n";
-      c +=
-        "  int DST_Y = (linear_id_1 % args.task_size_y) * " + std::to_string(block_size.y) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_Y = " + global_id_1 + " * " + std::to_string(block_size.y) + ";\n";
-    }
-    if (work_group_launch_order[2] == 2)
-    {
-      c += "  int DST_S = get_global_id(2) * " + std::to_string(block_size.w) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_S = (get_group_id(" + std::to_string(launch_remap[2]) +
-           ") * get_local_size(2) + get_local_id(2)) * " + std::to_string(block_size.w) + ";\n";
-    }
-  }
-
-  return c;
-}
-} // namespace
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-                         const DeviceInfo &device_info, const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
-    kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 1, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-                         const BHWC &weights_shape, const DeviceInfo &device_info,
-                         const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
-    kernel_size_(weights_shape.w, weights_shape.h, 1, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, weights_shape, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
-                         const DeviceInfo &device_info, const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
-    dilation_(1, 1, 1, 1), conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition)
-  : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
-    dilation_(1, 1, 1, 1)
-{
-}
-
-ConvPowerVR::ConvPowerVR(ConvPowerVR &&operation)
-  : GPUOperation(std::move(operation)), stride_(operation.stride_), padding_(operation.padding_),
-    kernel_size_(operation.kernel_size_), dilation_(operation.dilation_),
-    conv_params_(operation.conv_params_)
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
-                         const DeviceInfo &device_info, const BHWDC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0),
-    kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR &ConvPowerVR::operator=(ConvPowerVR &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(stride_, operation.stride_);
-    std::swap(padding_, operation.padding_);
-    std::swap(kernel_size_, operation.kernel_size_);
-    std::swap(dilation_, operation.dilation_);
-    std::swap(conv_params_, operation.conv_params_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-void ConvPowerVR::GenerateCode(const DeviceInfo &device_info)
-{
-  if (conv_params_.linear_spatial)
-  {
-    grid_dimension_ = 2;
-  }
-  const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1;
-  code_ = GenerateConv(device_info, definition_, stride_correction, conv_params_);
-  if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
-  {
-    compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
-  }
-  if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher())
-  {
-    compiler_options_.push_back(CompilerOptions::CL_2_0);
-  }
-  bool kernel_is_trivial = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    kernel_is_trivial = kernel_is_trivial & conv_params_.z_kernel_is_1;
-  }
-  if (device_info.IsAdreno3xx() && definition_.precision == CalculationsPrecision::F16 &&
-      kernel_is_trivial)
-  {
-    compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
-  }
-}
-
-absl::Status ConvPowerVR::BindArguments(ArgumentsBinder *args)
-{
-  if (!conv_params_.x_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_x", stride_.x));
-    RETURN_IF_ERROR(args->SetInt("padding_x", padding_.x * src_[0]->Batch()));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_x", kernel_size_.x));
-    RETURN_IF_ERROR(args->SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_y", stride_.y));
-    RETURN_IF_ERROR(args->SetInt("padding_y", padding_.y));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_y", kernel_size_.y));
-    RETURN_IF_ERROR(args->SetInt("dilation_y", dilation_.y));
-  }
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_z", stride_.z));
-    RETURN_IF_ERROR(args->SetInt("padding_z", padding_.z));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_z", kernel_size_.z));
-    RETURN_IF_ERROR(args->SetInt("dilation_z", dilation_.z));
-  }
-  if (conv_params_.linear_spatial)
-  {
-    const int grid_x =
-      DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
-    RETURN_IF_ERROR(args->SetInt("task_size_x", grid_x));
-  }
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-    RETURN_IF_ERROR(args->SetInt("task_size_y", task_size_y));
-  }
-  return absl::OkStatus();
-}
-
-int3 ConvPowerVR::GetGridSize() const
-{
-  const int task_size_x =
-    DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
-  const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-  const int task_size_z = DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z);
-  const int task_size_s = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w);
-  int3 wg;
-
-  if (conv_params_.linear_spatial)
-  {
-    int grid_x = task_size_x * task_size_y;
-    if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      grid_x *= task_size_z;
-    }
-    return int3(grid_x, task_size_s, 1);
-  }
-  else
-  {
-    int grid_y = task_size_y;
-    if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      grid_y *= task_size_z;
-    }
-    return int3(task_size_x, grid_y, task_size_s);
-  }
-}
-
-void ConvPowerVR::GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                              const KernelInfo &kernel_info,
-                                              std::vector<int3> *work_groups) const
-{
-  if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
-      conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS ||
-      conv_params_.fixed_work_group_size)
-  {
-    work_groups->push_back(work_group_size_);
-    return;
-  }
-  GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-std::string ConvPowerVR::GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
-                                      bool stride_correction, const ConvParams &conv_params)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddSrcTensor("src_tensor", src_desc);
-  if (op_def.src_tensors.size() == 2)
-  {
-    // dynamic weights
-    BufferDescriptor desc;
-    desc.element_type = op_def.src_tensors[1].data_type;
-    desc.element_size = 4;
-    desc.memory_type =
-      conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-        ? MemoryType::CONSTANT
-        : MemoryType::GLOBAL;
-
-    AddSrcBuffer("weights", desc);
-  }
-
-  const auto &src_def = op_def.src_tensors[0];
-
-  auto generate_id = [&](const std::string &x, const std::string &y, const std::string &z) {
-    std::string id;
-    if (src_def.HasAxis(Axis::WIDTH))
-    {
-      id += "_w" + x;
-    }
-    if (src_def.HasAxis(Axis::HEIGHT))
-    {
-      id += "_h" + y;
-    }
-    if (src_def.HasAxis(Axis::DEPTH))
-    {
-      id += "_d" + z;
-    }
-    return id;
-  };
-
-  auto generate_id_full = [&](const std::string &x, const std::string &y, const std::string &z,
-                              const std::string &s) { return generate_id(x, y, z) + "_s" + s; };
-
-  auto generate_check = [&](const std::string &x, const std::string &y, const std::string &z) {
-    std::string check;
-    const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
-    const std::vector<std::string> names{"in_x", "in_y", "in_z"};
-    const std::vector<bool> is_1{conv_params_.x_kernel_is_1, conv_params_.y_kernel_is_1,
-                                 conv_params_.z_kernel_is_1};
-    const std::vector<std::string> coords{x, y, z};
-    for (size_t i = 0; i < axes.size(); ++i)
-    {
-      const auto &axis = axes[i];
-      if (src_def.HasAxis(axis) && !src_def.SupportsZeroClamp(axis) && !is_1[i])
-      {
-        if (!check.empty())
-        {
-          check += " && ";
-        }
-        check += names[i] + coords[i];
-      }
-    }
-    return check;
-  };
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddDstTensor("dst_tensor", dst_desc);
-
-  if (!conv_params_.x_kernel_is_1)
-  {
-    args_.AddInt("stride_x");
-    args_.AddInt("padding_x");
-    args_.AddInt("kernel_size_x");
-    args_.AddInt("dilation_x");
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    args_.AddInt("stride_y");
-    args_.AddInt("padding_y");
-    args_.AddInt("kernel_size_y");
-    args_.AddInt("dilation_y");
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    args_.AddInt("stride_z");
-    args_.AddInt("padding_z");
-    args_.AddInt("kernel_size_z");
-    args_.AddInt("dilation_z");
-  }
-  if (conv_params_.linear_spatial)
-  {
-    args_.AddInt("task_size_x");
-  }
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    args_.AddInt("task_size_y");
-  }
-
-  const bool need_local_mem =
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
-
-  const int local_mem_size = conv_params.block_size.w * 4 * conv_params.src_depth_loop_size;
-
-  const bool use_simd_broadcast = conv_params.IsPrivateMemBroadcast();
-  const int simd_size = conv_params.simd_size;
-
-  const bool late_oob_check = need_local_mem || use_simd_broadcast;
-
-  const std::string weights_space =
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM ? "__constant"
-                                                                                    : "__global";
-
-  const std::string weights_data_type =
-    conv_params.weights_data_type == DataType::FLOAT32 ? "float4" : "half4";
-
-  const std::string weights_global_ptr = weights_space + " " + weights_data_type + "*";
-
-  std::string c = GetCommonDefines(op_def.precision);
-  if (use_simd_broadcast)
-  {
-    if (device_info.cl_version == OpenCLVersion::CL_2_0)
-    {
-      c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
-    }
-    else if (device_info.SupportsExtension("cl_intel_subgroups"))
-    {
-      c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n";
-    }
-  }
-  const int4 block_size = conv_params.block_size;
-  if (conv_params.fixed_work_group_size)
-  {
-    c += "__attribute__((reqd_work_group_size(" + std::to_string(work_group_size_.x) + ", " +
-         std::to_string(work_group_size_.y) + ", " + std::to_string(work_group_size_.z) + ")))\n";
-  }
-  if (use_simd_broadcast && device_info.IsIntel())
-  {
-    c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n";
-  }
-  std::string dst_oob_check;
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    if (conv_params.linear_spatial)
-    {
-      dst_oob_check = "DST_Z >= args.dst_tensor.Depth() || DST_S >= "
-                      "args.dst_tensor.Slices()";
-    }
-    else
-    {
-      dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Z >= "
-                      "args.dst_tensor.Depth() || DST_S >= args.dst_tensor.Slices()";
-    }
-  }
-  else
-  {
-    if (conv_params.linear_spatial)
-    {
-      dst_oob_check = "DST_Y >= args.dst_tensor.Height() || DST_S >= "
-                      "args.dst_tensor.Slices()";
-    }
-    else
-    {
-      dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Y >= "
-                      "args.dst_tensor.Height() || DST_S >= args.dst_tensor.Slices()";
-    }
-  }
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += GenerateBlockCoords(conv_params.block_size, work_group_launch_order_,
-                           conv_params.linear_spatial, src_def.HasAxis(Axis::DEPTH));
-  if (!late_oob_check)
-  {
-    c += "  if (" + dst_oob_check + ") {\n";
-    c += "    return;\n";
-    c += "  }\n";
-  }
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    if (conv_params.linear_spatial)
-    {
-      c += "  int lid = get_local_id(0);\n";
-    }
-    else
-    {
-      c += "  int lid = get_local_id(1) * " + std::to_string(work_group_size_.x) +
-           " + get_local_id(0);\n";
-    }
-  }
-  if (use_simd_broadcast)
-  {
-    c += "  int simd_id = get_sub_group_local_id();\n";
-  }
-  for (int s = 0; s < block_size.w; ++s)
-  {
-    const std::string sind = std::to_string(s);
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          c += "  ACCUM_FLT4 r" + generate_id_full(xind, yind, zind, sind) +
-               " = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.x_kernel_is_1)
-  {
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xind = std::to_string(x);
-      const std::string xc = "(DST_X + " + xind + ")";
-      if (stride_correction)
-      {
-        c += "  int xc" + xind + " = " +
-             GetXStrideCorrected(xc, "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-             ";\n";
-      }
-      else
-      {
-        c += "  int xc" + xind + " = " + xc + " * args.stride_x + args.padding_x;\n";
-      }
-    }
-  }
-  else
-  {
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xind = std::to_string(x);
-      c += "  int xc" + xind + " = DST_X + " + xind + ";\n";
-      if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
-      {
-        c += "  xc" + xind + " = clamp(xc" + xind + ", 0, args.src_tensor.Width() - 1);\n";
-      }
-    }
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      const std::string yc = "(DST_Y + " + yind + ")";
-      c += "  int yc" + yind + " = " + yc + " * args.stride_y + args.padding_y;\n";
-    }
-  }
-  else
-  {
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      c += "  int yc" + yind + " = DST_Y + " + yind + ";\n";
-      if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
-      {
-        c += "  yc" + yind + " = clamp(yc" + yind + ", 0, args.src_tensor.Height() - 1);\n";
-      }
-    }
-  }
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    if (!conv_params_.z_kernel_is_1)
-    {
-      for (int z = 0; z < block_size.z; ++z)
-      {
-        const std::string zind = std::to_string(z);
-        const std::string zc = "(DST_Z + " + zind + ")";
-        c += "  int zc" + zind + " = " + zc + " * args.stride_z + args.padding_z;\n";
-      }
-    }
-    else
-    {
-      for (int z = 0; z < block_size.z; ++z)
-      {
-        const std::string zind = std::to_string(z);
-        c += "  int zc" + zind + " = DST_Z + " + zind + ";\n";
-        if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
-        {
-          c += "  zc" + zind + " = clamp(zc" + zind + ", 0, args.src_tensor.Depth() - 1);\n";
-        }
-      }
-    }
-  }
-  bool trivial_kernel_size = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    trivial_kernel_size = trivial_kernel_size && conv_params_.z_kernel_is_1;
-  }
-  if (need_local_mem)
-  {
-    c += "  __local " + weights_data_type + " weights_cache[" + std::to_string(local_mem_size) +
-         "];\n";
-  }
-  else if (conv_params.AreWeightsBuffer())
-  {
-    c += "    " + weights_global_ptr + " weights_cache;\n";
-  }
-  else if (!trivial_kernel_size)
-  {
-    c += "  int filter_offset = 0;\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    if (conv_params.different_weights_for_height)
-    {
-      c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + (DST_S * "
-           "args.src_tensor.Height() + DST_Y * " +
-           std::to_string(block_size.w) + ") * 4 * args.src_tensor.Slices();\n";
-    }
-    else
-    {
-      std::string kernel_spatial_offset = "";
-      if (!conv_params_.x_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_x";
-      }
-      if (!conv_params_.y_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_y";
-      }
-      if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_z";
-      }
-      c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + DST_S * 4 * "
-           "args.src_tensor.Slices()" +
-           kernel_spatial_offset + ";\n";
-    }
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    c += "  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zck = "zck" + std::to_string(z);
-      c += "  int zck" + std::to_string(z) + " = kz * args.dilation_z + zc" + std::to_string(z) +
-           ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::DEPTH))
-      {
-        c += "  bool in_z" + std::to_string(z) + " = " + zck + " >= 0 && " + zck +
-             " < args.src_tensor.Depth();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
-        {
-          c += "  " + zck + " = clamp(" + zck + ", 0, args.src_tensor.Depth() - 1);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yck = "yck" + std::to_string(y);
-      c += "  int " + yck + " = ky * args.dilation_y + yc" + std::to_string(y) + ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::HEIGHT))
-      {
-        c += "  bool in_y" + std::to_string(y) + " = " + yck + " >= 0 && " + yck +
-             " < args.src_tensor.Height();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
-        {
-          c += "  " + yck + " = clamp(" + yck + ", 0, args.src_tensor.Height() - 1);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.x_kernel_is_1)
-  {
-    c += "  for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xck = "xck" + std::to_string(x);
-      c += "  int xck" + std::to_string(x) + " = kx * args.dilation_x + xc" + std::to_string(x) +
-           ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::WIDTH))
-      {
-        c += "  bool in_x" + std::to_string(x) + " = " + xck + " >= 0 && " + xck +
-             " < args.src_tensor.Width();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
-        {
-          c += "  " + xck + " = clamp(" + xck + ", 0, args.src_tensor.Width() - 1);\n";
-        }
-      }
-    }
-  }
-  const bool need_multiple_slice_strides =
-    src_def.ReturnsZeroForNegOneRead() && !trivial_kernel_size;
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string zind = std::to_string(z);
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      for (int x = 0; x < block_size.x; ++x)
-      {
-        const std::string xind = std::to_string(x);
-        std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
-        std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
-        const std::string id = generate_id(xind, yind, zind);
-        std::string coords = "" + xc + ", " + yc;
-        if (src_def.HasAxis(Axis::DEPTH))
-        {
-          std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
-          coords += ", " + zc;
-        }
-        if (src_def.IsLinear())
-        {
-          c += "  args.src_tensor.GetAddress(addr" + id + ", " + coords + ", 0);\n";
-          if (need_multiple_slice_strides)
-          {
-            const std::string check = generate_check(xind, yind, zind);
-            c += "  addr" + id + " = select(-1, addr" + id + ", (" + check + "));\n";
-            c +=
-              "  int ds" + id + " = select(0, args.src_tensor.SliceStride(), (" + check + "));\n";
-          }
-        }
-      }
-    }
-  }
-  if (src_def.IsLinear() && !need_multiple_slice_strides)
-  {
-    c += "  int ds = args.src_tensor.SliceStride();\n";
-  }
-
-  auto declare_src = [&]() {
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          const std::string id = generate_id(xind, yind, zind);
-          c += "    " + weights_data_type + " src" + id + ";\n";
-        }
-      }
-    }
-  };
-  const bool conditional_read = device_info.IsMali();
-  auto read_src = [&]() {
-    const std::string cl_type = ToCLDataType(conv_params.weights_data_type);
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          std::string id = generate_id(xind, yind, zind);
-          const std::string check = generate_check(xind, yind, zind);
-          std::string address;
-          if (src_def.IsLinear())
-          {
-            address = "addr" + id;
-          }
-          else
-          {
-            std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
-            std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
-            address = "" + xc + ", " + yc;
-            if (src_def.HasAxis(Axis::DEPTH))
-            {
-              std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
-              address += ", " + zc;
-            }
-            address += ", s";
-          }
-          if (src_def.ReturnsZeroForNegOneRead())
-          {
-            c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
-            const std::string ds = trivial_kernel_size ? "ds" : "ds" + id;
-            c += "    " + address + " += " + ds + ";\n";
-          }
-          else
-          {
-            if (!check.empty())
-            {
-              if (conditional_read)
-              {
-                c += "    src" + id + " = " + check + " ? args.src_tensor.Read<" + cl_type + ">(" +
-                     address + ") : (FLT4)(0.0f);\n";
-              }
-              else
-              {
-                c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address +
-                     ") * (FLT)(" + check + ");\n";
-              }
-            }
-            else
-            {
-              c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
-            }
-            if (src_def.IsLinear())
-            {
-              c += "    " + address + " += ds;\n";
-            }
-          }
-        }
-      }
-    }
-  };
-  const bool weights_type_as_accum_type = !(op_def.precision == CalculationsPrecision::F32_F16 &&
-                                            conv_params.weights_data_type == DataType::FLOAT16);
-  auto conv_core = [&](int shared_offset) {
-    const std::string channels[] = {"x", "y", "z", "w"};
-    for (int s = 0; s < block_size.w; ++s)
-    {
-      const std::string sind = std::to_string(s);
-      if (weights_type_as_accum_type)
-      {
-        for (int ch = 0; ch < 4; ++ch)
-        {
-          for (int z = 0; z < block_size.z; ++z)
-          {
-            const std::string zind = std::to_string(z);
-            for (int y = 0; y < block_size.y; ++y)
-            {
-              const std::string yind = std::to_string(y);
-              for (int x = 0; x < block_size.x; ++x)
-              {
-                const std::string xind = std::to_string(x);
-                std::string R = "r" + generate_id_full(xind, yind, zind, sind);
-                std::string S = "src" + generate_id(xind, yind, zind);
-                if (use_simd_broadcast)
-                {
-                  int simd_id = (s * 4 + ch + shared_offset) / simd_size;
-                  int thread_id = (s * 4 + ch + shared_offset) % simd_size;
-                  std::string w_val_x = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".x, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_y = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".y, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_z = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".z, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_w = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".w, " + std::to_string(thread_id) + "u)";
-                  c += "    " + R + ".x += " + w_val_x + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".y += " + w_val_y + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".z += " + w_val_z + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".w += " + w_val_w + " * " + S + "." + channels[ch] + ";\n";
-                }
-                else
-                {
-                  const std::string weight_id = std::to_string(s * 4 + ch + shared_offset);
-                  std::string w_val;
-                  if (conv_params.AreWeightsBuffer())
-                  {
-                    w_val = "weights_cache[" + weight_id + "]";
-                  }
-                  else
-                  {
-                    w_val = "f" + weight_id;
-                  }
-                  c += "    " + R + " += " + w_val + " * " + S + "." + channels[ch] + ";\n";
-                }
-              }
-            }
-          }
-        }
-      }
-      else
-      { // F32_F16 precision and weights type is float16
-        for (int z = 0; z < block_size.z; ++z)
-        {
-          const std::string zind = std::to_string(z);
-          for (int y = 0; y < block_size.y; ++y)
-          {
-            const std::string yind = std::to_string(y);
-            for (int x = 0; x < block_size.x; ++x)
-            {
-              const std::string xind = std::to_string(x);
-              std::string R = "r" + generate_id_full(xind, yind, zind, sind);
-              std::string S = "src" + generate_id(xind, yind, zind);
-              std::vector<std::string> F(4);
-              for (int i = 0; i < 4; ++i)
-              {
-                std::string weight_id = std::to_string(s * 4 + i + shared_offset);
-                if (conv_params.AreWeightsBuffer())
-                {
-                  F[i] = "weights_cache[" + weight_id + "]";
-                }
-                else
-                {
-                  F[i] = "f" + weight_id;
-                }
-              }
-              c += "    " + R + " += convert_float4(" + S + ".x * " + F[0] + " + " + S + ".y * " +
-                   F[1] + " + " + S + ".z * " + F[2] + " + " + S + ".w * " + F[3] + ");\n";
-            }
-          }
-        }
-      }
-    }
-  };
-
-  c += "  int s = 0;\n";
-  c += "  do {\n";
-  declare_src();
-  const int total_work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z;
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
-  {
-    c += GenerateAsyncUpload("weights_cache", "filters_loc",
-                             /*global_offset_name*/ "", local_mem_size);
-  }
-  else if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-    c +=
-      GenerateUploadByThreads("weights_cache", "filters_loc",
-                              /*global_offset_name*/ "", "lid", total_work_items, local_mem_size);
-  }
-  else if (use_simd_broadcast)
-  {
-    int parts = local_mem_size / simd_size;
-    int reminder = local_mem_size % simd_size;
-    for (int i = 0; i < parts; ++i)
-    {
-      c += "    FLT4 simd_w" + std::to_string(i) + " = filters_loc[simd_id + " +
-           std::to_string(i * simd_size) + "];\n";
-    }
-    if (reminder)
-    {
-      c += "    FLT4 simd_w" + std::to_string(parts) + ";\n";
-      c += "    if (simd_id < " + std::to_string(reminder) + ") {\n";
-      c += "      simd_w" + std::to_string(parts) + " = filters_loc[simd_id + " +
-           std::to_string(parts * simd_size) + "];\n";
-      c += "    }\n";
-    }
-  }
-  else if (conv_params.AreWeightsBuffer())
-  { // GLOBAL_MEM/CONSTANT_MEM
-    c += "    weights_cache = filters_loc;\n";
-  }
-  else
-  { // TEXTURES_MEM
-    for (int dst_s = 0; dst_s < block_size.w; ++dst_s)
-    {
-      std::string f_y = trivial_kernel_size ? "s" : "filter_offset";
-      if (conv_params.different_weights_for_height)
-      {
-        f_y = "DST_Y * args.src_tensor.Slices() + s";
-      }
-      c += absl::Substitute(
-        R"(    FLT4 f$2 = args.weights0.Read(DST_S + $0, $1);
-    FLT4 f$3 = args.weights1.Read(DST_S + $0, $1);
-    FLT4 f$4 = args.weights2.Read(DST_S + $0, $1);
-    FLT4 f$5 = args.weights3.Read(DST_S + $0, $1);
-)",
-        dst_s, f_y, dst_s * 4 + 0, dst_s * 4 + 1, dst_s * 4 + 2, dst_s * 4 + 3);
-    }
-    if (!trivial_kernel_size)
-    {
-      c += "    filter_offset++;\n";
-    }
-  }
-  read_src();
-  c += "    s += 1;\n";
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-  }
-  conv_core(0);
-  for (int i = 1; i < conv_params.src_depth_loop_size; ++i)
-  {
-    read_src();
-    conv_core(i * block_size.w * 4);
-    c += "    s += 1;\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    c += "    filters_loc += " + std::to_string(local_mem_size) + ";\n";
-  }
-  c += "  } while (s < args.src_tensor.Slices());\n";
-  if (!conv_params.x_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (!conv_params.y_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
-    {
-      c += GenerateAsyncUpload("weights_cache", "args.biases.GetPtr()", "DST_S", block_size.w);
-    }
-    else if (conv_params.weights_upload_type ==
-             ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-    {
-      c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-      c += GenerateUploadByThreads("weights_cache", "args.biases.GetPtr()", "DST_S", "lid",
-                                   total_work_items, block_size.w);
-      c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-    }
-    else
-    {
-      c += "  weights_cache = args.biases.GetPtr() + DST_S;\n";
-    }
-  }
-  if (late_oob_check)
-  {
-    c += "  if (" + dst_oob_check + ") {\n";
-    c += "    return;\n";
-    c += "  }\n";
-  }
-
-  auto generate_dst_check = [&](int x, int y, int z) {
-    std::string check;
-    const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
-    const std::vector<std::string> names{"Width()", "Height()", "Depth()"};
-    std::vector<std::string> coords(3);
-    coords[0] = "DST_X + " + std::to_string(x);
-    coords[1] = "DST_Y + " + std::to_string(y);
-    coords[2] = "DST_Z + " + std::to_string(z);
-    const std::vector<int> ids{x, y, z};
-    for (size_t i = 0; i < axes.size(); ++i)
-    {
-      const auto &axis = axes[i];
-      if (src_def.HasAxis(axis) && ids[i] != 0)
-      {
-        if (!check.empty())
-        {
-          check += " && ";
-        }
-        check += coords[i] + " < args.dst_tensor." + names[i];
-      }
-    }
-    return check;
-  };
-
-  for (int s = 0; s < block_size.w; ++s)
-  {
-    const std::string sind = std::to_string(s);
-    c += "  if (DST_S + " + sind + " >= args.dst_tensor.Slices()) return;\n";
-    c += "  {\n";
-    if (conv_params.AreWeightsBuffer())
-    {
-      c += "    FLT4 bias_val = TO_FLT4(weights_cache[" + sind + "]);\n";
-    }
-    else
-    {
-      c += "    FLT4 bias_val = args.biases.Read(DST_S + " + sind + ");\n";
-    }
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          const std::string id = generate_id_full(xind, yind, zind, sind);
-          const std::string check = generate_dst_check(x, y, z);
-          std::string coords = "DST_X + " + xind + ", DST_Y + " + yind;
-          if (src_def.HasAxis(Axis::DEPTH))
-          {
-            coords += ", DST_Z + " + zind;
-          }
-          coords += ", DST_S + " + sind;
-          if (!check.empty())
-          {
-            c += "  if (" + check + ") {\n";
-          }
-          else
-          {
-            c += "  {\n";
-          }
-          c += "    FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n";
-          c += "    args.dst_tensor.Write(res, " + coords + ");\n";
-          c += "  }\n";
-        }
-      }
-    }
-    c += "  }\n";
-  }
-  c += "}\n";
-  return c;
-}
-
-ConvPowerVR::ConvParams
-ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
-                             bool different_weights_for_height, const BHWC *dst_shape)
-{
-  ConvParams conv_params;
-  conv_params.linear_spatial = false;
-  conv_params.weights_data_type = DeduceDataTypeFromPrecision(definition.precision);
-  conv_params.x_kernel_is_1 = x_kernel_is_1;
-  conv_params.y_kernel_is_1 = y_kernel_is_1;
-  conv_params.different_weights_for_height = different_weights_for_height;
-  if (device_info.IsNvidia())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(1, 0, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.block_size = int4(2, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (dst_shape)
-    {
-      int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
-      float task_size_per_cu = static_cast<float>(task_size) / device_info.compute_units_count;
-      int block_size =
-        conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w;
-      float threads_per_cu = task_size_per_cu / block_size;
-      float warps_per_cu = threads_per_cu / 32 /*warp_size*/;
-      if (warps_per_cu < 8.0f)
-      {
-        conv_params.block_size.x = 1;
-      }
-      if (warps_per_cu < 4.0f && conv_params.block_size.w >= 4)
-      {
-        conv_params.block_size.w /= 2;
-      }
-      if (warps_per_cu < 2.0f && conv_params.block_size.w >= 2)
-      {
-        conv_params.block_size.w /= 2;
-      }
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-  else if (device_info.IsPowerVR())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(1, 0, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.weights_data_type =
-      definition.precision == CalculationsPrecision::F16 ? DataType::FLOAT16 : DataType::FLOAT32;
-    conv_params.block_size = int4(1, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
-    if (dst_depth % 8 == 0 || dst_depth >= 32)
-    {
-      conv_params.block_size.w = 8;
-    }
-    else if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (definition.precision == CalculationsPrecision::F16)
-    {
-      conv_params.block_size.w = std::min(4, conv_params.block_size.w);
-      if (src_depth % 2 == 0)
-      {
-        conv_params.src_depth_loop_size = 2;
-      }
-      if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-      {
-        conv_params.src_depth_loop_size = 4;
-      }
-      if (conv_params.block_size.w == 1)
-      {
-        if (src_depth % 2 == 0)
-        {
-          conv_params.src_depth_loop_size = 2;
-        }
-        if (src_depth % 4 == 0)
-        {
-          conv_params.src_depth_loop_size = 4;
-        }
-        if (src_depth <= 8)
-        {
-          conv_params.src_depth_loop_size = src_depth;
-        }
-      }
-      conv_params.block_size.x = 2;
-    }
-  }
-  else if (device_info.IsAMD())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      work_group_size_ = int3(8, 4, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-
-    conv_params.block_size = int4(2, 1, 1, 1);
-    if (x_kernel_is_1 && y_kernel_is_1)
-    {
-      conv_params.block_size.y = 2;
-    }
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::CONSTANT_MEM;
-    if (dst_depth % 8 == 0 || dst_depth >= 32)
-    {
-      conv_params.block_size.w = 8;
-    }
-    else if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = 1;
-    }
-    if (src_depth % 2 == 0 && src_depth >= 16)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-  }
-  else if (device_info.IsMali())
-  {
-    int block_size = 2;
-    if (dst_shape)
-    {
-      int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
-      block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
-    }
-    if (!x_kernel_is_1 || !y_kernel_is_1)
-    {
-      block_size = std::min(block_size, 4);
-    }
-    if (block_size == 8)
-    {
-      if (dst_depth == 1 || dst_depth == 3)
-      {
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-      else
-      {
-        conv_params.block_size = int4(2, 2, 1, 2);
-      }
-    }
-    else if (block_size == 4)
-    {
-      if (dst_depth == 1 || dst_depth == 3)
-      {
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-      else
-      {
-        conv_params.block_size = int4(2, 1, 1, 2);
-      }
-    }
-    else if (block_size == 2)
-    {
-      conv_params.block_size = int4(2, 1, 1, 1);
-    }
-    else
-    {
-      conv_params.block_size = int4(1, 1, 1, 1);
-    }
-    conv_params.src_depth_loop_size = 1;
-    MaliInfo mali_info = device_info.mali_info;
-    if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard())
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && block_size == 1 && !mali_info.IsMidgard() &&
-        definition.precision == CalculationsPrecision::F16)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-    work_group_size_ = int3(4, 4, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-  }
-  else if (device_info.IsAdreno())
-  {
-    conv_params.block_size = int4(2, 2, 1, 2);
-    if (device_info.IsAdreno3xx())
-    {
-      if (definition.precision == CalculationsPrecision::F16)
-      {
-        conv_params.block_size = int4(2, 2, 1, 2);
-      }
-      else if (definition.precision == CalculationsPrecision::F32_F16)
-      {
-        conv_params.block_size = int4(2, 1, 1, 2);
-      }
-      else
-      { // F32
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-    }
-    work_group_size_ = int3(8, 2, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.src_depth_loop_size = 1;
-    if (definition.src_tensors.size() == 2)
-    {
-      // dynamic weights supported only with buffers.
-      conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-    }
-    else
-    {
-      conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM_X4;
-    }
-  }
-  else if (device_info.IsIntel())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(16, 1, 1);
-      work_group_launch_order_ = int3(0, 1, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(16, 1, 1);
-      work_group_launch_order_ = int3(0, 1, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.block_size = int4(1, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    int sub_group_size = 16;
-    const bool supports_subgroups = device_info.SupportsExtension("cl_khr_subgroups") ||
-                                    device_info.SupportsExtension("cl_intel_subgroups");
-    if (definition.precision != CalculationsPrecision::F32_F16 && supports_subgroups &&
-        device_info.SupportsExtension("cl_intel_required_subgroup_size") &&
-        device_info.SupportsSubGroupWithSize(sub_group_size))
-    {
-      conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
-      conv_params.simd_size = sub_group_size;
-    }
-    else
-    {
-      conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
-    }
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-  else
-  {
-    conv_params.block_size = int4(1, 1, 1, 4);
-    work_group_size_ = int3(8, 2, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-
-  return conv_params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
-                             attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
-                             attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
-                             attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
-                             attr.padding.appended.h == 0;
-  return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                         y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution3DAttributes &attr,
-                                                     const BHWDC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
-                             attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
-                             attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
-                             attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
-                             attr.padding.appended.h == 0;
-  const bool z_kernel_is_1 = attr.weights.shape.d == 1 && attr.strides.d == 1 &&
-                             attr.dilations.d == 1 && attr.padding.prepended.d == 0 &&
-                             attr.padding.appended.d == 0;
-
-  ConvPowerVR::ConvParams result;
-  BHWC shape;
-  if (dst_shape)
-  {
-    shape.b = dst_shape->b;
-    shape.h = dst_shape->h * dst_shape->d;
-    shape.w = dst_shape->w;
-    shape.c = dst_shape->c;
-    result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                             y_kernel_is_1, false, &shape);
-  }
-  else
-  {
-    result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                             y_kernel_is_1, false, nullptr);
-  }
-  result.z_kernel_is_1 = z_kernel_is_1;
-  return result;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC &weights_shape,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
-  const int src_depth = DivideRoundUp(weights_shape.c, 4);
-  const bool x_kernel_is_1 = weights_shape.w == 1 && attr.strides.w == 1 && attr.dilations.w == 1 &&
-                             attr.padding.prepended.w == 0 && attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 &&
-                             attr.padding.prepended.h == 0 && attr.padding.appended.h == 0;
-  return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                         y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const FullyConnectedAttributes &attr,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvPowerVR::ConvParams params =
-    GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, false, dst_shape);
-  work_group_size_.x *= work_group_size_.y;
-  work_group_size_.y = 1;
-  params.block_size.x *= params.block_size.y;
-  params.block_size.y = 1;
-  return params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd(const DeviceInfo &device_info,
-                                                             const OperationDef &definition,
-                                                             const Convolution2DAttributes &attr,
-                                                             const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvPowerVR::ConvParams params =
-    GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, true, dst_shape);
-  params.block_size.x *= params.block_size.y;
-  params.block_size.y = 1;
-  return params;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const FullyConnectedAttributes &attr, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                            const OperationDef &definition,
-                                            const Convolution2DAttributes &attr,
-                                            const BHWC &weights_shape, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, weights_shape, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadBias(attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const Convolution2DAttributes &attr,
-                                          const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition);
-  result.conv_params_ = result.GuessBestParamsWinograd(device_info, definition, attr, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadDataForWinograd4x4To6x6(attr.weights);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
-                                const Convolution3DAttributes &attr, const BHWDC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadWeights(attr.weights);
-  result.UploadBias(attr.bias);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h

deleted file mode 100644 (file)

index f83f057..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-
-#include <cstring>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvPowerVR : public GPUOperation
-{
-public:
-  ConvPowerVR() = default;
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  ConvWeightsDescription GetConvWeightsDescription() const
-  {
-    ConvWeightsDescription desc;
-    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
-    desc.output_group_size = conv_params_.block_size.w;
-    return desc;
-  }
-
-  // Move only
-  ConvPowerVR(ConvPowerVR &&operation);
-  ConvPowerVR &operator=(ConvPowerVR &&operation);
-  ConvPowerVR(const ConvPowerVR &) = delete;
-  ConvPowerVR &operator=(const ConvPowerVR &) = delete;
-
-private:
-  enum class WeightsUploadType
-  {
-    LOCAL_MEM_ASYNC_SUBGROUP, // we use it for PowerVR with workgroup size = 32
-    LOCAL_MEM_BY_THREADS,
-    GLOBAL_MEM,
-    CONSTANT_MEM,
-    PRIVATE_MEM_SIMD_BROADCAST,
-    TEXTURES_MEM_X4, // 4 textures for weights
-  };
-
-  struct ConvParams
-  {
-    // Usually we use this combinations for CalculationPrecision:
-    // F32: all F32
-    // F16: all F16
-    // F32_F16: all besides accumulator is F16, including weights
-    // But for PowerVR we can achieve better performance in F32_F16 with F32
-    // weights, so for PowerVR in this kernel we have F32 weights for
-    // F32_F16 precision mode
-    DataType weights_data_type; // used for weights and biases
-    int4 block_size;            // WHDS
-    bool fixed_work_group_size;
-    bool linear_spatial; // spatial dimensions are Width/Height/Depth
-    bool different_weights_for_height;
-    int src_depth_loop_size;
-    WeightsUploadType weights_upload_type;
-    bool x_kernel_is_1;
-    bool y_kernel_is_1;
-    bool z_kernel_is_1;
-
-    // used only with PRIVATE_MEM_SIMD_BROADCAST
-    int simd_size = 1;
-
-    bool AreWeightsBuffer() const
-    {
-      return weights_upload_type != WeightsUploadType::TEXTURES_MEM_X4;
-    }
-
-    bool IsPrivateMemBroadcast() const
-    {
-      return weights_upload_type == WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
-    }
-  };
-
-  ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-              const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
-  ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-              const BHWC &weights_shape, const DeviceInfo &device_info,
-              const BHWC *dst_shape = nullptr);
-  ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
-              const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
-  explicit ConvPowerVR(const OperationDef &definition);
-  ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
-              const DeviceInfo &device_info, const BHWDC *dst_shape = nullptr);
-
-  void GenerateCode(const DeviceInfo &device_info);
-
-  template <DataType T>
-  void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
-  template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWDI, T> &weights);
-
-  template <DataType T> void UploadBias(const InternalTensor<Linear, T> &bias);
-
-  friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
-                                       const OperationDef &definition,
-                                       const Convolution2DAttributes &attr, const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
-                                       const OperationDef &definition,
-                                       const FullyConnectedAttributes &attr, const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC &weights_shape,
-                                                     const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                                   const OperationDef &definition,
-                                                   const Convolution2DAttributes &attr,
-                                                   const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info,
-                                         const OperationDef &definition,
-                                         const Convolution3DAttributes &attr,
-                                         const BHWDC *dst_shape);
-
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                             const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const FullyConnectedAttributes &attr, const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParamsWinograd(const DeviceInfo &device_info, const OperationDef &definition,
-                                     const Convolution2DAttributes &attr,
-                                     const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution3DAttributes &attr, const BHWDC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
-                             bool different_weights_for_height, const BHWC *dst_shape = nullptr);
-
-  std::string GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
-                           bool stride_correction, const ConvParams &conv_params);
-
-  int4 stride_;
-  int4 padding_;
-  int4 kernel_size_;
-  int4 dilation_;
-  ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvPowerVR::UploadData(const InternalTensor<OHWI, T> &weights,
-                             const InternalTensor<Linear, T> &biases)
-{
-  UploadWeights(weights);
-  UploadBias(biases);
-}
-
-template <DataType T>
-void ConvPowerVR::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
-  InternalTensor<OHWI, T> wino_weights;
-  RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
-  UploadWeights(wino_weights);
-  InternalTensor<Linear, DataType::FLOAT32> biases;
-  biases.shape = Linear(weights.shape.o);
-  biases.data.resize(weights.shape.o, 0.0f);
-  UploadBias(biases);
-}
-
-template <DataType T> void ConvPowerVR::UploadBias(const InternalTensor<Linear, T> &bias)
-{
-  BufferDescriptor desc;
-  desc.element_type = conv_params_.weights_data_type;
-  desc.element_size = 4;
-  desc.memory_type =
-    conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-      ? MemoryType::CONSTANT
-      : MemoryType::GLOBAL;
-  const int float_size = sizeof(float);
-  // TODO
-  // conv_params_.weights_data_type == DataType::FLOAT32 ? sizeof(float) : sizeof(half);
-  int aligned_channels = AlignByN(bias.shape.v, 4 * conv_params_.block_size.w);
-  desc.size = float_size * aligned_channels;
-  desc.data.resize(desc.size);
-  if (conv_params_.weights_data_type == DataType::FLOAT32)
-  {
-    float *gpu_data = reinterpret_cast<float *>(desc.data.data());
-    for (int i = 0; i < aligned_channels; ++i)
-    {
-      gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
-    }
-  }
-  //   else
-  //   {
-  //     half *gpu_data = reinterpret_cast<half *>(desc.data.data());
-  //     for (int i = 0; i < aligned_channels; ++i)
-  //     {
-  //       gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
-  //     }
-  //   }
-  args_.AddObject("biases", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
-  const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), conv_params_.block_size.w);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
-  const bool f32_weights = conv_params_.weights_data_type == DataType::FLOAT32;
-  const int float4_size = sizeof(float4);
-  // TODO
-  // f32_weights ? sizeof(float4) : sizeof(half4);
-
-  const int elements_count = weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    if (conv_params_.AreWeightsBuffer())
-    {
-      RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
-                                       absl::MakeSpan(ptr, elements_count));
-    }
-    else
-    {
-      RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
-                                       absl::MakeSpan(ptr, elements_count));
-    }
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(data.data());
-  //     if (conv_params_.AreWeightsBuffer())
-  //     {
-  //       RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
-  //                                        absl::MakeSpan(ptr, elements_count));
-  //     }
-  //     else
-  //     {
-  //       RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
-  //                                        absl::MakeSpan(ptr, elements_count));
-  //     }
-  //   }
-  if (conv_params_.AreWeightsBuffer())
-  {
-    BufferDescriptor desc;
-    desc.element_type = conv_params_.weights_data_type;
-    desc.element_size = 4;
-    desc.memory_type =
-      conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-        ? MemoryType::CONSTANT
-        : MemoryType::GLOBAL;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    const int texture_width = dst_slices;
-    const int texture_height = src_slices * weights.shape.h * weights.shape.w;
-    const int sub_size = float4_size * texture_width * texture_height;
-    for (int i = 0; i < 4; ++i)
-    {
-      Texture2DDescriptor desc;
-      desc.element_type = conv_params_.weights_data_type;
-      desc.size = int2(texture_width, texture_height);
-      desc.data.resize(sub_size);
-      std::memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
-      const std::string name = "weights" + std::to_string(i);
-      args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-    }
-  }
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWDI, T> &weights)
-{
-  const int block_size = conv_params_.block_size.w;
-  const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), block_size);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
-  const int elements_count =
-    weights.shape.d * weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
-  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
-
-  const int float4_size = f32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    if (conv_params_.AreWeightsBuffer())
-    {
-      RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
-                                        absl::MakeSpan(ptr, elements_count));
-    }
-    else
-    {
-      RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
-                                        absl::MakeSpan(ptr, elements_count));
-    }
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(data.data());
-  //     if (conv_params_.AreWeightsBuffer())
-  //     {
-  //       RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
-  //                                         absl::MakeSpan(ptr, elements_count));
-  //     }
-  //     else
-  //     {
-  //       RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
-  //                                         absl::MakeSpan(ptr, elements_count));
-  //     }
-  //   }
-
-  if (conv_params_.AreWeightsBuffer())
-  {
-    BufferDescriptor desc;
-    desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    const int texture_width = dst_slices;
-    const int texture_height = src_slices * weights.shape.d * weights.shape.h * weights.shape.w;
-    int sub_size = float4_size * texture_width * texture_height;
-    for (int i = 0; i < 4; ++i)
-    {
-      Texture2DDescriptor desc;
-      desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-      desc.size = int2(texture_width, texture_height);
-      desc.data.resize(sub_size);
-      memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
-      const std::string name = "weights" + std::to_string(i);
-      args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-    }
-  }
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const FullyConnectedAttributes &attr,
-                              const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                            const OperationDef &definition,
-                                            const Convolution2DAttributes &attr,
-                                            const BHWC &weights_shape,
-                                            const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const Convolution2DAttributes &attr,
-                                          const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
-                                const Convolution3DAttributes &attr,
-                                const BHWDC *dst_shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc

deleted file mode 100644 (file)

index 95172bd..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "open_cl/kernels/ConvWeightsConverter.h"
-
-#include <string>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ConverterToConvWeights::ConverterToConvWeights(const OperationDef &definition,
-                                               const ConvWeightsDescription &conv_weights_desc)
-  : GPUOperation(definition), conv_weights_desc_(conv_weights_desc)
-{
-  code_ = GetConverterToConvWeightsCode(definition_, conv_weights_desc_);
-}
-
-ConverterToConvWeights::ConverterToConvWeights(ConverterToConvWeights &&operation)
-  : GPUOperation(std::move(operation)), conv_weights_desc_(operation.conv_weights_desc_)
-{
-}
-
-ConverterToConvWeights &ConverterToConvWeights::operator=(ConverterToConvWeights &&operation)
-{
-  if (this != &operation)
-  {
-    conv_weights_desc_ = operation.conv_weights_desc_;
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string ConverterToConvWeights::GetConverterToConvWeightsCode(
-  const OperationDef &op_def, const ConvWeightsDescription &conv_weights_desc)
-{
-  AddSrcTensor("src_tensor", op_def.src_tensors[0]);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-  args_.AddFloat("mask_x");
-  args_.AddFloat("mask_y");
-  args_.AddFloat("mask_z");
-  args_.AddFloat("mask_w");
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int GROUP_SIZE = " + std::to_string(conv_weights_desc.output_group_size) + ";\n";
-  c += "  int O = get_global_id(0) * 4;\n";
-  c += "  int I = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  int W = Z % args.src_tensor.Width();\n";
-  c += "  int H = Z / args.src_tensor.Width();\n";
-  c += "  if (O >= args.src_tensor.Batch() || I >= args.src_tensor.Slices() || "
-       "H >= args.src_tensor.Height()) return;\n";
-  c += "  FLT4 v0 = args.src_tensor.Read(W, H, I, O + 0);\n";
-  c += "  FLT4 v1 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  FLT4 v2 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  FLT4 v3 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  if (O + 1 < args.src_tensor.Batch()) {\n";
-  c += "    v1 = args.src_tensor.Read(W, H, I, O + 1);\n";
-  c += "  }\n";
-  c += "  if (O + 2 < args.src_tensor.Batch()) {\n";
-  c += "    v2 = args.src_tensor.Read(W, H, I, O + 2);\n";
-  c += "  }\n";
-  c += "  if (O + 3 < args.src_tensor.Batch()) {\n";
-  c += "    v3 = args.src_tensor.Read(W, H, I, O + 3);\n";
-  c += "  }\n";
-  c += "  if (I == args.src_tensor.Slices() - 1) {\n";
-  c += "    FLT4 mask = (FLT4)(args.mask_x, args.mask_y, args.mask_z, "
-       "args.mask_w);\n";
-  c += "    v0 *= mask;\n";
-  c += "    v1 *= mask;\n";
-  c += "    v2 *= mask;\n";
-  c += "    v3 *= mask;\n";
-  c += "  }\n";
-  c += "  FLT4 r0 = (FLT4)(v0.x, v1.x, v2.x, v3.x);\n";
-  c += "  FLT4 r1 = (FLT4)(v0.y, v1.y, v2.y, v3.y);\n";
-  c += "  FLT4 r2 = (FLT4)(v0.z, v1.z, v2.z, v3.z);\n";
-  c += "  FLT4 r3 = (FLT4)(v0.w, v1.w, v2.w, v3.w);\n";
-  c += "  int d_index = O / (GROUP_SIZE * 4);\n";
-  c += "  int k_index = (O % (GROUP_SIZE * 4)) / 4;\n";
-  c += "  int dst_offset = (((d_index * args.src_tensor.Height() + H) * "
-       "args.src_tensor.Width() + W) * "
-       "args.src_tensor.Slices() + I) * GROUP_SIZE + "
-       "k_index;\n";
-  c += "  int address0 = dst_offset * 4 + 0;\n";
-  c += "  int address1 = dst_offset * 4 + 1;\n";
-  c += "  int address2 = dst_offset * 4 + 2;\n";
-  c += "  int address3 = dst_offset * 4 + 3;\n";
-  c += "  args.dst_tensor.WriteLinear(r0, dst_offset * 4 + 0)\n;";
-  c += "  args.dst_tensor.WriteLinear(r1, dst_offset * 4 + 1)\n;";
-  c += "  args.dst_tensor.WriteLinear(r2, dst_offset * 4 + 2)\n;";
-  c += "  args.dst_tensor.WriteLinear(r3, dst_offset * 4 + 3)\n;";
-  c += "}\n";
-  return c;
-}
-
-absl::Status ConverterToConvWeights::BindArguments(ArgumentsBinder *args)
-{
-  float4 mask = GetMaskForLastPlane(src_[0]->Channels());
-  RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
-  RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
-  RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
-  return args->SetFloat("mask_w", mask.w);
-}
-
-int3 ConverterToConvWeights::GetGridSize() const
-{
-  const int grid_x =
-    DivideRoundUp(AlignByN(src_[0]->Batch(), 4 * conv_weights_desc_.output_group_size), 4);
-  const int grid_y = src_[0]->Slices();
-  const int grid_z = src_[0]->Width() * src_[0]->Height();
-  return int3(grid_x, grid_y, grid_z);
-}
-
-ConverterToConvWeights CreateConverterToConvWeights(const OperationDef &definition,
-                                                    const ConvWeightsDescription &conv_weights_desc)
-{
-  return ConverterToConvWeights(definition, conv_weights_desc);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h

deleted file mode 100644 (file)

index bb68977..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Status.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConverterToConvWeights : public GPUOperation
-{
-public:
-  ConverterToConvWeights(const OperationDef &definition,
-                         const ConvWeightsDescription &conv_weights_desc);
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  ConverterToConvWeights(ConverterToConvWeights &&operation);
-  ConverterToConvWeights &operator=(ConverterToConvWeights &&operation);
-  ConverterToConvWeights(const ConverterToConvWeights &) = delete;
-  ConverterToConvWeights &operator=(const ConverterToConvWeights &) = delete;
-
-private:
-  std::string GetConverterToConvWeightsCode(const OperationDef &op_def,
-                                            const ConvWeightsDescription &conv_weights_desc);
-
-  ConvWeightsDescription conv_weights_desc_;
-};
-
-// We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I
-// as dst we expect Tensor with storage type BUFFER and
-// dst.b * dst.h * dst.w * dst.c = AlignByN(src.b, 4) * src.h * src.w
-// AlignByN(src.c, 4)
-ConverterToConvWeights
-CreateConverterToConvWeights(const OperationDef &definition,
-                             const ConvWeightsDescription &conv_weights_desc);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc

deleted file mode 100644 (file)

index cc2bc41..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Converter.h"
-
-#include <algorithm>
-#include <array>
-#include <string>
-
-#include "open_cl/Arguments.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClErrors.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-class OpenClConverterImpl : public TensorObjectConverter
-{
-public:
-  virtual absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                            Environment *environment) = 0;
-
-protected:
-  absl::Status DispatchKernel(cl_mem buffer_mem, Tensor *tensor)
-  {
-    kernel_.ResetBindingCounter();
-    RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem));
-    RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor));
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
-    const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices());
-    const int3 work_group_size = {16, 8, 1};
-    const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
-    return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
-  }
-
-  Arguments args_;
-  BHWC shape_;
-  CLKernel kernel_;
-  TensorDescriptor tensor_descriptor_;
-  CLCommandQueue *queue_ = nullptr;
-  const CLContext *context_ = nullptr;
-};
-
-bool IsSupportedDataType(DataType type)
-{
-  return type == DataType::FLOAT16 || type == DataType::FLOAT32;
-}
-
-bool IsBHWCOpenCLBuffer(const ObjectDef &def)
-{
-  return IsSupportedDataType(def.data_type) && def.object_type == ObjectType::OPENCL_BUFFER &&
-         def.data_layout == DataLayout::BHWC;
-}
-
-bool IsOpenCLTensor(const ObjectDef &def)
-{
-  const bool is_buffer_tensor =
-    def.object_type == ObjectType::OPENCL_BUFFER && def.data_layout == DataLayout::DHWC4;
-  const bool is_image2d_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::HDWC4;
-  const bool is_image2d_array_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::DHWC4;
-  const bool is_single_image_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::BHWC;
-  return IsSupportedDataType(def.data_type) && (is_buffer_tensor || is_image2d_tensor ||
-                                                is_image2d_array_tensor || is_single_image_tensor);
-}
-
-absl::Status GetOpenCLMemory(const TensorObject &obj, cl_mem *memory)
-{
-  auto texture = absl::get_if<OpenClTexture>(&obj);
-  auto buffer = absl::get_if<OpenClBuffer>(&obj);
-  if (texture && texture->memobj)
-  {
-    *memory = texture->memobj;
-  }
-  else if (buffer && buffer->memobj)
-  {
-    *memory = buffer->memobj;
-  }
-  else
-  {
-    return absl::InvalidArgumentError("Missing OpenCL object.");
-  }
-  return absl::OkStatus();
-}
-
-// Implements conversion from OpenCL tensor to another OpenCL tensor.
-class TensorToTensorConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenCLTensor(input) && IsOpenCLTensor(output);
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    src_tensor_descriptor_.layout = Layout::BHWC;
-    src_tensor_descriptor_.storage_type =
-      ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
-    src_tensor_descriptor_.data_type = input_def.object_def.data_type;
-    args_.AddObjectRef("src_tensor", AccessType::READ,
-                       absl::make_unique<TensorDescriptor>(src_tensor_descriptor_));
-
-    dst_tensor_descriptor_.layout = Layout::BHWC;
-    dst_tensor_descriptor_.storage_type =
-      ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
-    dst_tensor_descriptor_.data_type = output_def.object_def.data_type;
-    args_.AddObjectRef("dst_tensor", AccessType::WRITE,
-                       absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    shader_src +=
-      R"(__kernel void tensor_to_tensor($0) {
-  int linear_id = get_global_id(0);
-  int x = linear_id / args.dst_tensor.Batch();
-  int b = linear_id % args.dst_tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-  if (x >= args.dst_tensor.Width() || y >= args.dst_tensor.Height() || d >= args.dst_tensor.Slices()) return;
-)";
-    shader_src +=
-      "  " + out_data_type + "4 input = args.src_tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
-    shader_src += "  args.dst_tensor.Write(input, x, y, d, b);\n}";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "tensor_to_tensor", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    cl_mem in_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
-    cl_mem out_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
-
-    Tensor src_tensor;
-    RETURN_IF_ERROR(
-      CreateSharedTensor(*context_, in_memory, shape_, src_tensor_descriptor_, &src_tensor));
-    Tensor dst_tensor;
-    RETURN_IF_ERROR(
-      CreateSharedTensor(*context_, out_memory, shape_, dst_tensor_descriptor_, &dst_tensor));
-
-    RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor));
-    RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor));
-
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    const int3 grid =
-      int3(dst_tensor.Width() * dst_tensor.Batch(), dst_tensor.Height(), dst_tensor.Slices());
-    const int3 work_group_size = {16, 8, 1};
-    const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
-    return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
-  }
-
-private:
-  TensorDescriptor src_tensor_descriptor_;
-  TensorDescriptor dst_tensor_descriptor_;
-};
-
-// Implements conversion from OpenCL-specific tensor layout to BHWC OpenCL
-// buffer.
-class TensorToBHWCBufferConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenCLTensor(input) && IsBHWCOpenCLBuffer(output);
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    TensorStorageType src_tensor_type =
-      ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
-    tensor_descriptor_.layout = Layout::BHWC;
-    tensor_descriptor_.storage_type = src_tensor_type;
-    tensor_descriptor_.data_type = input_def.object_def.data_type;
-    args_.AddObjectRef("tensor", AccessType::READ,
-                       absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    shader_src += "__kernel void tensor_to_bhwc(";
-    shader_src += "__global " + out_data_type + "* dst, $0) {\n";
-    shader_src += R"(  int linear_id = get_global_id(0);
-  int x = linear_id / args.tensor.Batch();
-  int b = linear_id % args.tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-  if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
-    shader_src +=
-      "  " + out_data_type + "4 input = args.tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
-    shader_src += R"(  int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-
-  dst[index] = input.x;
-  if (c + 1 < args.tensor.Channels()) {
-    dst[index + 1] = input.y;
-  }
-  if (c + 2 < args.tensor.Channels()) {
-    dst[index + 2] = input.z;
-  }
-  if (c + 3 < args.tensor.Channels()) {
-    dst[index + 3] = input.w;
-  }
-})";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "tensor_to_bhwc", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto output = absl::get_if<OpenClBuffer>(&output_obj);
-    if (!output || !output->memobj)
-    {
-      return absl::InvalidArgumentError("Missing output in tensor_to_bhwc converter");
-    }
-
-    cl_mem in_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
-    Tensor tensor;
-    RETURN_IF_ERROR(CreateSharedTensor(*context_, in_memory, shape_, tensor_descriptor_, &tensor));
-    return DispatchKernel(output->memobj, &tensor);
-  }
-};
-
-// Implements conversion from BHWC OpenCL buffer to OpenCL-specific tensor
-// layout.
-class BHWCBufferToTensorConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsBHWCOpenCLBuffer(input) && IsOpenCLTensor(output);
-  }
-
-  std::pair<std::string, std::string> GetFromBhwcKernel(const TensorObjectDef &input_def,
-                                                        const TensorObjectDef &) const
-  {
-    return std::make_pair("__global " + ToCLDataType(input_def.object_def.data_type) + "* src",
-                          R"(int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-  result.x = src[index];
-  result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
-  result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
-  result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)");
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    auto params_kernel = GetFromBhwcKernel(input_def, output_def);
-
-    TensorStorageType dst_tensor_type =
-      ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
-    tensor_descriptor_.layout = Layout::BHWC;
-    tensor_descriptor_.storage_type = dst_tensor_type;
-    tensor_descriptor_.data_type = output_def.object_def.data_type;
-    args_.AddObjectRef("tensor", AccessType::WRITE,
-                       absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    const std::string in_data_type = ToCLDataType(input_def.object_def.data_type);
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    shader_src += "__kernel void bhwc_to_tensor(";
-    shader_src += "__global " + in_data_type + "* src, $0) {\n";
-
-    shader_src += R"(  int linear_id = get_global_id(0);
-  int x = linear_id / args.tensor.Batch();
-  int b = linear_id % args.tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-
-  if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
-    shader_src += "  " + out_data_type + "4 result;\n";
-    shader_src += R"(  int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-  result.x = src[index];
-  result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
-  result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
-  result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)";
-    shader_src += "  args.tensor.Write(result, x, y, d, b);\n}";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w,
-                  output_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "bhwc_to_tensor", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto input = absl::get_if<OpenClBuffer>(&input_obj);
-    if (!input || !input->memobj)
-    {
-      return absl::InvalidArgumentError("Missing input in bhwc_to_tensor converter");
-    }
-    cl_mem out_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
-    Tensor tensor;
-    RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_, tensor_descriptor_, &tensor));
-    return DispatchKernel(input->memobj, &tensor);
-  }
-};
-
-std::array<size_t, 3> CalculateTextureRegion(const TensorObjectDef &def)
-{
-  const auto &dims = def.dimensions;
-  std::array<size_t, 3> region = {0, 0, 1};
-  switch (ToTensorStorageType(def.object_def.object_type, def.object_def.data_layout))
-  {
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h);
-      break;
-    case TensorStorageType::TEXTURE_2D:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h * dims.d());
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h);
-      region[2] = static_cast<size_t>(dims.d());
-      break;
-    default:
-      break;
-  }
-  return region;
-}
-
-bool IsOpenClTextureOrBuffer(ObjectType type)
-{
-  return type == ObjectType::OPENCL_BUFFER || type == ObjectType::OPENCL_TEXTURE;
-}
-
-// Copies data from one object of the same type and layout to another object.
-class TrivialCopier : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenClTextureOrBuffer(input.object_type) && input.data_type == output.data_type &&
-           input.object_type == output.object_type && input.data_layout == output.data_layout;
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    data_type_ = input_def.object_def.data_type;
-    queue_ = environment->queue();
-    region_ = CalculateTextureRegion(output_def);
-    return absl::OkStatus();
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
-    auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
-    if (texture_input && texture_output)
-    {
-      return Copy(*texture_input, *texture_output);
-    }
-    auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
-    auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
-    if (buffer_input && buffer_output)
-    {
-      return Copy(*buffer_input, *buffer_output);
-    }
-    return absl::InternalError("Unexpected object");
-  }
-
-  absl::Status Copy(const OpenClBuffer &input, const OpenClBuffer &output)
-  {
-    if (input.memobj == output.memobj)
-    {
-      return absl::OkStatus();
-    }
-    return GetOpenCLError(clEnqueueCopyBuffer(queue_->queue(), input.memobj, output.memobj, 0, 0,
-                                              SizeOf(data_type_) * shape_.w * shape_.h *
-                                                AlignByN(shape_.c, 4) * shape_.b,
-                                              0, nullptr, nullptr));
-  }
-
-  absl::Status Copy(const OpenClTexture &input, const OpenClTexture &output)
-  {
-    if (input.memobj == output.memobj)
-    {
-      return absl::OkStatus();
-    }
-    size_t origin[3] = {0, 0, 0};
-    return GetOpenCLError(clEnqueueCopyImage(queue_->queue(), input.memobj, output.memobj, origin,
-                                             origin, region_.data(), 0, nullptr, nullptr));
-  }
-
-private:
-  DataType data_type_ = DataType::UNKNOWN;
-  std::array<size_t, 3> region_;
-};
-
-// Copies data from/to CPU into a tensor.
-class CpuCopier : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return input.data_type == output.data_type && input.data_layout == output.data_layout &&
-           ((input.object_type == ObjectType::CPU_MEMORY &&
-             IsOpenClTextureOrBuffer(output.object_type)) ||
-            (output.object_type == ObjectType::CPU_MEMORY &&
-             IsOpenClTextureOrBuffer(input.object_type)));
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-
-    region_ = CalculateTextureRegion(
-      input_def.object_def.object_type == ObjectType::CPU_MEMORY ? output_def : input_def);
-    queue_ = environment->queue();
-    return absl::OkStatus();
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
-    auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
-
-    if (cpu_input)
-    {
-      auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
-      if (texture_output)
-      {
-        return queue_->EnqueueWriteImage(texture_output->memobj,
-                                         int3(region_[0], region_[1], region_[2]), cpu_input->data);
-      }
-      auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
-      if (buffer_output)
-      {
-        return queue_->EnqueueWriteBuffer(buffer_output->memobj, cpu_input->size_bytes,
-                                          cpu_input->data);
-      }
-    }
-    else if (cpu_output)
-    {
-      auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
-      if (texture_input)
-      {
-        return queue_->EnqueueReadImage(texture_input->memobj,
-                                        int3(region_[0], region_[1], region_[2]), cpu_output->data);
-      }
-      auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
-      if (buffer_input)
-      {
-        return queue_->EnqueueReadBuffer(buffer_input->memobj, cpu_output->size_bytes,
-                                         cpu_output->data);
-      }
-    }
-    return absl::InternalError("Unexpected object");
-  }
-
-private:
-  std::array<size_t, 3> region_;
-};
-
-class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder
-{
-public:
-  explicit OpenClTensorConverterBuilder(Environment *environment) : environment_(environment) {}
-
-  bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const final
-  {
-    const auto &input_def = input.object_def;
-    const auto &output_def = output.object_def;
-    return input.dimensions == output.dimensions &&
-           (TrivialCopier::IsSupported(input_def, output_def) ||
-            TensorToTensorConverter::IsSupported(input_def, output_def) ||
-            CpuCopier::IsSupported(input_def, output_def) ||
-            TensorToBHWCBufferConverter::IsSupported(input_def, output_def) ||
-            BHWCBufferToTensorConverter::IsSupported(input_def, output_def));
-  }
-
-  absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
-                             std::unique_ptr<TensorObjectConverter> *converter) final
-  {
-    std::unique_ptr<OpenClConverterImpl> impl;
-    const auto &input_def = input.object_def;
-    const auto &output_def = output.object_def;
-    if (TrivialCopier::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TrivialCopier>();
-    }
-    else if (TensorToTensorConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TensorToTensorConverter>();
-    }
-    else if (CpuCopier::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<CpuCopier>();
-    }
-    else if (TensorToBHWCBufferConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TensorToBHWCBufferConverter>();
-    }
-    else if (BHWCBufferToTensorConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<BHWCBufferToTensorConverter>();
-    }
-    else
-    {
-      return absl::UnimplementedError("Unsupported conversion");
-    }
-    RETURN_IF_ERROR(impl->Init(input, output, environment_));
-    *converter = std::move(impl);
-    return absl::OkStatus();
-  }
-
-  Environment *environment_;
-};
-
-} // namespace
-
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment)
-{
-  return absl::make_unique<OpenClTensorConverterBuilder>(environment);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h

deleted file mode 100644 (file)

index d69ec85..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-
-#include <memory>
-
-#include "open_cl/Environment.h"
-#include "open_cl/Spi.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Supports conversions from BHWC to internal OpenCL tensor representation and
-// back. Also supports F16/F32.
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc

deleted file mode 100644 (file)

index e409fef..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv.h"
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/LinearStorage.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-bool IsSpecializedCase(int channel_multiplier)
-{
-  return channel_multiplier == 1 || channel_multiplier == 2 || channel_multiplier == 4;
-}
-
-std::string GetSrcValue(int channel_multiplier, const std::string coords)
-{
-  std::string c;
-  if (channel_multiplier == 1)
-  {
-    c += "      FLT4 src_final = args.src_tensor.Read(" + coords + ", S);\n";
-  }
-  else if (channel_multiplier == 2)
-  {
-    c += "      int s_layer = S / 2;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      FLT2 t0 = S % 2 == 0 ? src.xy : src.zw;\n";
-    c += "      FLT4 src_final = (FLT4)(t0.x, t0.x, t0.y, t0.y);\n";
-  }
-  else if (channel_multiplier == 4)
-  {
-    c += "      int s_layer = S / 4;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      FLT t0 = src.x;\n";
-    c += "      int reminder = S % 4;\n";
-    c += "      if (reminder == 1) t0 = src.y;\n";
-    c += "      if (reminder == 2) t0 = src.z;\n";
-    c += "      if (reminder == 3) t0 = src.w;\n";
-    c += "      FLT4 src_final = (FLT4)(t0, t0, t0, t0);\n";
-  }
-  else
-  {
-    c += "      int s_layer = S / args.ch_multiplier;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      int s_offset = (S % args.ch_multiplier) * 4;\n";
-    c += "      FLT4 src_final;\n";
-    c += "      FLT temp_arr[4] = {src.x, src.y, src.z, src.w};\n";
-    c += "      src_final.x = temp_arr[(s_offset + 0) / args.ch_multiplier];\n";
-    c += "      src_final.y = temp_arr[(s_offset + 1) / args.ch_multiplier];\n";
-    c += "      src_final.z = temp_arr[(s_offset + 2) / args.ch_multiplier];\n";
-    c += "      src_final.w = temp_arr[(s_offset + 3) / args.ch_multiplier];\n";
-  }
-
-  return c;
-}
-
-std::string GenerateDepthwiseConvolutionCode(const OperationDef &op_def, bool stride_correction,
-                                             int channel_multiplier, bool weights_are_buffer,
-                                             bool dynamic_weights, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  if (dynamic_weights)
-  {
-    op->AddSrcTensor("weights", op_def.src_tensors[1]);
-  }
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
-  std::string c = GetCommonDefines(op_def.precision);
-
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int Z = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int S = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "S >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  if (stride_correction)
-  {
-    c += "  int x_offseted = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int x_offseted = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int x_offseted = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int y_offseted = Y * args.stride_y + args.padding_y;\n";
-  if (!dynamic_weights)
-  {
-    std::string weights_offset = "args.kernel_size_x * args.kernel_size_y";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  int z_offseted = Z * args.stride_z + args.padding_z;\n";
-      weights_offset += " * args.kernel_size_z";
-    }
-    if (weights_are_buffer)
-    {
-      c += "  int fx_c = S * " + weights_offset + ";\n";
-    }
-    else
-    {
-      c += "  int fx_c = 0;\n";
-    }
-  }
-  std::string kernel_size_x = dynamic_weights ? "args.weights.Width()" : "args.kernel_size_x";
-  std::string kernel_size_y = dynamic_weights ? "args.weights.Height()" : "args.kernel_size_y";
-  std::string kernel_size_z = dynamic_weights ? "args.weights.Depth()" : "args.kernel_size_z";
-
-  std::string flat_coords = "x_c, y_c";
-  if (manual_clamp)
-  {
-    std::string check = "!outside_x && !outside_y";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      check += " && !outside_z";
-      flat_coords += ", z_c";
-      c += "  for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
-      c += "    int z_c = z_offseted + kz * args.dilation_z;\n";
-      c += "    bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n";
-    }
-    c += "  for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
-    c += "    int y_c = y_offseted + ky * args.dilation_y;\n";
-    c += "    bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
-    c += "    for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    c += "      int x_c = x_offseted + kx * " + dilation_x + ";\n";
-    c += "      bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n";
-    c += "      if (" + check + ") {\n";
-    if (dynamic_weights)
-    {
-      c += "        FLT4 f = args.weights.Read(kx, ky, S);\n";
-    }
-    else
-    {
-      if (weights_are_buffer)
-      {
-        c += "        FLT4 f = args.weights.Read(fx_c);\n";
-      }
-      else
-      {
-        c += "        FLT4 f = args.weights.Read(fx_c, S);\n";
-      }
-    }
-    c += GetSrcValue(channel_multiplier, flat_coords);
-    c += "        r += TO_ACCUM_TYPE(src_final * f);\n";
-    c += "      };\n";
-    if (!dynamic_weights)
-    {
-      c += "      fx_c++;\n";
-    }
-    c += "    }\n";
-    c += "  }\n";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  }\n";
-    }
-  }
-  else
-  { // Texture types with ZERO clamping
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      flat_coords += ", z_c";
-      c += "  for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
-      c += "    int z_c = z_offseted + kz * args.dilation_z;\n";
-      if (src_tensor_type != TensorStorageType::TEXTURE_3D)
-      { // Only TEXTURE_3D supports clamping
-        // in DEPTH dimension
-        c += "    if (z_c < 0 || z_c >= args.src_tensor.Depth()) {\n";
-        c += "      fx_c += args.kernel_size_y * args.kernel_size_x;\n";
-        c += "      continue;\n";
-        c += "    }\n";
-      }
-    }
-    c += "  for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
-    c += "    int y_c = y_offseted + ky * args.dilation_y;\n";
-    c += "    for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    c += "      int x_c = x_offseted + kx * " + dilation_x + ";\n";
-    c += GetSrcValue(channel_multiplier, flat_coords);
-    if (dynamic_weights)
-    {
-      c += "      FLT4 f = args.weights.Read(kx, ky, S);\n";
-    }
-    else
-    {
-      if (weights_are_buffer)
-      {
-        c += "      FLT4 f = args.weights.Read(fx_c);\n";
-      }
-      else
-      {
-        c += "      FLT4 f = args.weights.Read(fx_c, S);\n";
-      }
-      c += "      fx_c++;\n";
-    }
-    c += "      r += TO_ACCUM_TYPE(src_final * f);\n";
-    c += "    }\n";
-    c += "  }\n";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  }\n";
-    }
-  }
-  c += "  FLT4 res0 = TO_FLT4(r) + args.biases.Read(S);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  args.dst_tensor.Write(res0, X, Y, Z, S);\n";
-  }
-  else
-  {
-    c += "  args.dst_tensor.Write(res0, X, Y, S);\n";
-  }
-  c += "}\n";
-
-  return c;
-}
-} // namespace
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution2DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsMali();
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  if (!IsSpecializedCase(attr.weights.shape.o))
-  {
-    op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
-  }
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
-                                              weights_are_buffer, false, &op);
-  UploadWeightsForDWConv2D(attr.weights, weights_are_buffer, definition.precision, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const DepthwiseConvolution2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1, false, true, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    device_info.IsMali() ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution3DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsMali();
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  op.args_.AddInt("kernel_size_z", attr.weights.shape.d);
-  op.args_.AddInt("stride_z", attr.strides.d);
-  op.args_.AddInt("padding_z", -attr.padding.prepended.d);
-  op.args_.AddInt("dilation_z", attr.dilations.d);
-  if (!IsSpecializedCase(attr.weights.shape.o))
-  {
-    op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
-  }
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
-                                              weights_are_buffer, false, &op);
-  UploadWeightsForDWConv3D(attr.weights, weights_are_buffer, definition.precision, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h

deleted file mode 100644 (file)

index cbadd9f..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv2D(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_depth = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  int counter = 0;
-  for (int d = 0; d < dst_depth; ++d)
-  {
-    for (int y = 0; y < kernel_y; ++y)
-    {
-      for (int x = 0; x < kernel_x; ++x)
-      {
-        T filter_val;
-        for (int i = 0; i < 4; ++i)
-        {
-          const int d_ch = d * 4 + i;
-          if (d_ch < dst_channels)
-          {
-            const int f_index =
-              weights.shape.LinearIndex({d_ch % weights.shape.o, y, x, d_ch / weights.shape.o});
-            filter_val[i] = weights.data[f_index];
-          }
-          else
-          {
-            filter_val[i] = 0.0f;
-          }
-        }
-        dst[counter++] = filter_val;
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv2D(const InternalTensor<OHWI, T> &weights, bool weights_are_buffer,
-                              CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  const int elements_count = kernel_x * kernel_y * dst_slices;
-
-  const bool fp32_weights = precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(kernel_x * kernel_y, dst_slices);
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(desc));
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv3D(const InternalTensor<OHWDI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-  const int kernel_z = weights.shape.d;
-
-  int counter = 0;
-  for (int d = 0; d < dst_slices; ++d)
-  {
-    for (int z = 0; z < kernel_z; ++z)
-    {
-      for (int y = 0; y < kernel_y; ++y)
-      {
-        for (int x = 0; x < kernel_x; ++x)
-        {
-          T filter_val;
-          for (int i = 0; i < 4; ++i)
-          {
-            const int d_ch = d * 4 + i;
-            if (d_ch < dst_channels)
-            {
-              const int f_index = weights.shape.LinearIndex(
-                {d_ch % weights.shape.o, y, x, z, d_ch / weights.shape.o});
-              filter_val[i] = weights.data[f_index];
-            }
-            else
-            {
-              filter_val[i] = 0.0f;
-            }
-          }
-          dst[counter++] = filter_val;
-        }
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv3D(const InternalTensor<OHWDI, T> &weights, bool weights_are_buffer,
-                              CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-  const int kernel_z = weights.shape.d;
-
-  const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
-
-  const bool fp32_weights = precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-  }
-}
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc

deleted file mode 100644 (file)

index 89a14f1..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv3x3.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
-                                   bool local_mem_uploads, const DeviceInfo &device_info)
-  : GPUOperation(definition), local_mem_uploads_(local_mem_uploads)
-{
-  work_group_size_ = int3(8, 4, 1);
-  code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer, local_mem_uploads_);
-
-  if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
-  {
-    compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
-  }
-}
-
-DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3 &&operation)
-  : GPUOperation(std::move(operation)), local_mem_uploads_(operation.local_mem_uploads_)
-{
-}
-
-DepthwiseConv3x3 &DepthwiseConv3x3::operator=(DepthwiseConv3x3 &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(local_mem_uploads_, operation.local_mem_uploads_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string DepthwiseConv3x3::GenerateDepthwiseConvCode(const OperationDef &op_def,
-                                                        bool weights_are_buffer,
-                                                        bool local_mem_uploads)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  AddSrcTensor("src_tensor", src_desc);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  std::string c = GetCommonDefines(op_def.precision);
-  if (local_mem_uploads)
-  {
-    c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
-  }
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = (linear_id / args.dst_tensor.Batch()) * 2;\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-    c += "  args.src_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0) * 2;\n";
-  }
-  c += "  int Y = get_global_id(1) * 2;\n";
-  c += "  int S = get_global_id(2);\n";
-  c += "   ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n";
-  if (!local_mem_uploads)
-  {
-    c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
-         "|| S >= args.dst_tensor.Slices()) { \n";
-    c += "    return; \n";
-    c += "  } \n";
-  }
-  if (local_mem_uploads)
-  {
-    c += "  __local FLT4 f[10];\n";
-    c += "  event_t e = async_work_group_copy(f, args.weights.GetPtr() + S * "
-         "10, 10, 0);\n";
-    c += "  wait_group_events(1, &e);\n";
-  }
-  else if (weights_are_buffer)
-  {
-    c += "  __global FLT4* f = args.weights.GetPtr() + S * 10;\n";
-  }
-  c += "  FLT4 s0;\n";
-  c += "  FLT4 s1;\n";
-  c += "  FLT4 s2;\n";
-  c += "  FLT4 s3;\n";
-  std::string W[9] = {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"};
-  std::string bias = "bias";
-  std::string xc[4] = {"X - 1", "X", "X + 1", "X + 2"};
-  std::string yc[4] = {"Y - 1", "Y", "Y + 1", "Y + 2"};
-  if (!weights_are_buffer)
-  {
-    c += "   FLT4 f0 = args.weights.Read(0, S);\n";
-    c += "   FLT4 f1 = args.weights.Read(1, S);\n";
-    c += "   FLT4 f2 = args.weights.Read(2, S);\n";
-    c += "   FLT4 f3 = args.weights.Read(3, S);\n";
-    c += "   FLT4 f4 = args.weights.Read(4, S);\n";
-    c += "   FLT4 f5 = args.weights.Read(5, S);\n";
-    c += "   FLT4 f6 = args.weights.Read(6, S);\n";
-    c += "   FLT4 f7 = args.weights.Read(7, S);\n";
-    c += "   FLT4 f8 = args.weights.Read(8, S);\n";
-  }
-  if (manual_clamp)
-  {
-    c += "  int x0 = X - 1;\n";
-    c += "  int x1 = X;\n";
-    c += "  int x2 = X + 1;\n";
-    c += "  int x3 = X + 2;\n";
-    c += "  int y0 = Y - 1;\n";
-    c += "  int y1 = Y;\n";
-    c += "  int y2 = Y + 1;\n";
-    c += "  int y3 = Y + 2;\n";
-    c += "  bool x0_in = x0 >= 0 && x0 < args.dst_tensor.Width();\n";
-    c += "  bool x1_in = x1 >= 0 && x1 < args.dst_tensor.Width();\n";
-    c += "  bool x2_in = x2 >= 0 && x2 < args.dst_tensor.Width();\n";
-    c += "  bool x3_in = x3 >= 0 && x3 < args.dst_tensor.Width();\n";
-    c += "  bool y0_in = y0 >= 0 && y0 < args.dst_tensor.Height();\n";
-    c += "  bool y1_in = y1 >= 0 && y1 < args.dst_tensor.Height();\n";
-    c += "  bool y2_in = y2 >= 0 && y2 < args.dst_tensor.Height();\n";
-    c += "  bool y3_in = y3 >= 0 && y3 < args.dst_tensor.Height();\n";
-    c += "  x0 = clamp(x0, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x1 = clamp(x1, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x2 = clamp(x2, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x3 = clamp(x3, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  y0 = clamp(y0, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y1 = clamp(y1, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y2 = clamp(y2, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y3 = clamp(y3, 0, args.dst_tensor.Height() - 1);\n";
-    if (src_tensor_type == TensorStorageType::BUFFER)
-    {
-      c += "  __global FLT4* src_loc = "
-           "args.src_tensor.GetPtrWithSliceOffset(S);\n";
-    }
-    xc[0] = "x0";
-    xc[1] = "x1";
-    xc[2] = "x2";
-    xc[3] = "x3";
-    yc[0] = "y0";
-    yc[1] = "y1";
-    yc[2] = "y2";
-    yc[3] = "y3";
-  }
-  if (local_mem_uploads || weights_are_buffer)
-  {
-    W[0] = "f[0]";
-    W[1] = "f[1]";
-    W[2] = "f[2]";
-    W[3] = "f[3]";
-    W[4] = "f[4]";
-    W[5] = "f[5]";
-    W[6] = "f[6]";
-    W[7] = "f[7]";
-    W[8] = "f[8]";
-    bias = "f[9]";
-  }
-  auto read_4x_line = [&](int y) {
-    if (src_tensor_type == TensorStorageType::BUFFER)
-    {
-      const std::string y_in = "y" + std::to_string(y) + "_in";
-      c += "    s0 = src_loc[args.src_tensor.GetWHOffset(" + xc[0] + ", " + yc[y] +
-           ")] * (FLT)(x0_in && " + y_in + ");\n";
-      c += "    s1 = src_loc[args.src_tensor.GetWHOffset(" + xc[1] + ", " + yc[y] +
-           ")] * (FLT)(x1_in && " + y_in + ");\n";
-      c += "    s2 = src_loc[args.src_tensor.GetWHOffset(" + xc[2] + ", " + yc[y] +
-           ")] * (FLT)(x2_in && " + y_in + ");\n";
-      c += "    s3 = src_loc[args.src_tensor.GetWHOffset(" + xc[3] + ", " + yc[y] +
-           ")] * (FLT)(x3_in && " + y_in + ");\n";
-    }
-    else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER)
-    {
-      const std::string y_in = "y" + std::to_string(y) + "_in";
-      c += "    s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S) * (FLT)(x0_in && " +
-           y_in + ");\n";
-      c += "    s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S) * (FLT)(x1_in && " +
-           y_in + ");\n";
-      c += "    s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S) * (FLT)(x2_in && " +
-           y_in + ");\n";
-      c += "    s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S) * (FLT)(x3_in && " +
-           y_in + ");\n";
-    }
-    else
-    {
-      c += "    s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S);\n";
-      c += "    s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S);\n";
-      c += "    s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S);\n";
-      c += "    s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S);\n";
-    }
-  };
-  c += "  {\n";
-  read_4x_line(0);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(1);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(2);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(3);
-  c += "    r2 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
-  c += "  }\n";
-  if (!weights_are_buffer)
-  {
-    c += "   FLT4 bias = args.weights.Read(9, S);\n";
-  }
-  c += "  r0 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r1 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r2 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r3 += TO_ACCUM_TYPE(" + bias + ");\n";
-  if (local_mem_uploads)
-  {
-    c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
-         "|| S >= args.dst_tensor.Slices()) { \n";
-    c += "    return; \n";
-    c += "  } \n";
-  }
-  c += "  if(X + 0 < args.dst_tensor.Width() && Y + 0 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r0);\n";
-  c += "    args.dst_tensor.Write(result, X + 0, Y + 0, S)\n";
-  c += "  }\n";
-  c += "  if(X + 1 < args.dst_tensor.Width() && Y + 0 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r1);\n";
-  c += "    args.dst_tensor.Write(result, X + 1, Y + 0, S)\n";
-  c += "  }\n";
-  c += "  if(X + 0 < args.dst_tensor.Width() && Y + 1 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r2);\n";
-  c += "    args.dst_tensor.Write(result, X + 0, Y + 1, S)\n";
-  c += "  }\n";
-  c += "  if(X + 1 < args.dst_tensor.Width() && Y + 1 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r3);\n";
-  c += "    args.dst_tensor.Write(result, X + 1, Y + 1, S)\n";
-  c += "  }\n";
-  c += "}\n";
-
-  return c;
-}
-
-int3 DepthwiseConv3x3::GetGridSize() const
-{
-  const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch();
-  const int grid_y = DivideRoundUp(dst_[0]->Height(), 2);
-  const int grid_z = dst_[0]->Slices();
-  return int3(grid_x, grid_y, grid_z);
-}
-
-void DepthwiseConv3x3::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                                   const DeviceInfo &device_info,
-                                                   const KernelInfo &kernel_info,
-                                                   std::vector<int3> *work_groups) const
-{
-  if (local_mem_uploads_)
-  {
-    work_groups->push_back(work_group_size_);
-  }
-  else
-  {
-    GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-  }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr)
-{
-  return attr.weights.shape.o == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.weights.shape.w == 3 && attr.weights.shape.h == 3 && attr.strides.w == 1 &&
-         attr.strides.h == 1 && attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1 &&
-         attr.padding.appended.w == 1 && attr.padding.appended.h == 1;
-}
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                        const OperationDef &definition,
-                                        const DepthwiseConvolution2DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsPowerVR() || device_info.IsMali();
-  bool local_mem_uploads = weights_are_buffer && device_info.IsPowerVR();
-  DepthwiseConv3x3 result(definition, weights_are_buffer, local_mem_uploads, device_info);
-  result.UploadWeightsAndBiases(attr.weights, attr.bias, weights_are_buffer);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h

deleted file mode 100644 (file)

index 8c57110..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-
-#include <memory>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class DepthwiseConv3x3 : public GPUOperation
-{
-public:
-  DepthwiseConv3x3() = default;
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  DepthwiseConv3x3(DepthwiseConv3x3 &&operation);
-  DepthwiseConv3x3 &operator=(DepthwiseConv3x3 &&operation);
-  DepthwiseConv3x3(const DepthwiseConv3x3 &) = delete;
-  DepthwiseConv3x3 &operator=(const DepthwiseConv3x3 &) = delete;
-
-private:
-  explicit DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
-                            bool local_mem_uploads, const DeviceInfo &device_info);
-  template <DataType T>
-  void UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
-                              const InternalTensor<Linear, T> &biases, bool weights_are_buffer);
-
-  friend DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                                 const OperationDef &definition,
-                                                 const DepthwiseConvolution2DAttributes &attr);
-
-  template <DataType S, typename T>
-  void RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
-                                     const InternalTensor<Linear, S> &biases, absl::Span<T> dst);
-
-  std::string GenerateDepthwiseConvCode(const OperationDef &op_def, bool weights_are_buffer,
-                                        bool local_mem_uploads);
-
-  bool local_mem_uploads_;
-};
-
-template <DataType T>
-void DepthwiseConv3x3::UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
-                                              const InternalTensor<Linear, T> &biases,
-                                              bool weights_are_buffer)
-{
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-  int texture_width = 10; // 3x3 kernel + 1 bias
-  int texture_height = src_depth;
-  const int elements_count = texture_width * texture_height;
-  const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsAndBiasesData(weights, biases, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsAndBiasesData(weights, biases,
-  //                                 absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(texture_width, texture_height);
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-  }
-}
-
-template <DataType S, typename T>
-void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
-                                                     const InternalTensor<Linear, S> &biases,
-                                                     absl::Span<T> dst)
-{
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
-  int counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    for (int y = 0; y < 3; ++y)
-    {
-      for (int x = 0; x < 3; ++x)
-      {
-        T filter_val;
-        for (int i = 0; i < 4; ++i)
-        {
-          const int s_ch = s * 4 + i;
-          if (s_ch < weights.shape.i)
-          {
-            const int f_index = weights.shape.LinearIndex({0, y, x, s_ch});
-            filter_val[i] = weights.data[f_index];
-          }
-          else
-          {
-            filter_val[i] = 0.0f;
-          }
-        }
-        dst[counter++] = filter_val;
-      }
-    }
-
-    T bias_val;
-    for (int i = 0; i < 4; ++i)
-    {
-      const int dst_ch = s * 4 + i;
-      bias_val[i] = dst_ch >= biases.shape.v ? 0.0f : biases.data[dst_ch];
-    }
-    dst[counter++] = bias_val;
-  }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr);
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                        const OperationDef &definition,
-                                        const DepthwiseConvolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc

deleted file mode 100644 (file)

index 8839d96..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GpuOperation.h"
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "open_cl/AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetElementWiseCode(const OperationDef &op_def, bool check_src_slices)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) return; \n";
-  if (check_src_slices)
-  {
-    c += "  FLT4 src = (FLT4)(0.0f);\n";
-    c += "  if (Z < args.src_tensor.Slices()) {\n";
-    c += "    src = args.src_tensor.Read(X, Y, Z);\n";
-    c += "  }\n";
-  }
-  else
-  {
-    c += "  FLT4 src = args.src_tensor.Read(X, Y, Z);\n";
-  }
-  c += "  args.dst_tensor.Write(src, X, Y, Z);\n";
-  c += "} \n";
-  return c;
-}
-
-int3 GetWorkGroupsCount(int grid_dimension, const int3 &grid_size, const int3 &work_group_size,
-                        const int3 &work_group_launch_order)
-{
-  int3 work_groups_count;
-  if (grid_dimension == 1)
-  {
-    work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    work_groups_count.y = 1;
-    work_groups_count.z = 1;
-  }
-  else if (grid_dimension == 2)
-  {
-    int3 wgs;
-    wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
-    work_groups_count.x = wgs[work_group_launch_order[0]];
-    work_groups_count.y = wgs[work_group_launch_order[1]];
-    work_groups_count.z = 1;
-  }
-  else
-  { // grid_dimension == 3
-    int3 wgs;
-    wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
-    wgs.z = DivideRoundUp(grid_size.z, work_group_size.z);
-    work_groups_count.x = wgs[work_group_launch_order[0]];
-    work_groups_count.y = wgs[work_group_launch_order[1]];
-    work_groups_count.z = wgs[work_group_launch_order[2]];
-  }
-  return work_groups_count;
-}
-
-} // namespace
-
-DataType OperationDef::GetDataType() const { return DeduceDataTypeFromPrecision(precision); }
-
-DataType OperationDef::GetPrimaryDataType() const { return src_tensors[0].data_type; }
-TensorStorageType OperationDef::GetPrimaryStorageType() const
-{
-  return src_tensors[0].storage_type;
-}
-
-bool OperationDef::IsBatchSupported() const
-{
-  for (const auto &src : src_tensors)
-  {
-    if (HasAxis(src.layout, Axis::BATCH))
-    {
-      return true;
-    }
-  }
-  for (const auto &dst : dst_tensors)
-  {
-    if (HasAxis(dst.layout, Axis::BATCH))
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-GPUOperation::GPUOperation(const OperationDef &definition) : definition_(definition) {}
-
-void GPUOperation::SetSrc(Tensor *ptr, int index)
-{
-  if (index >= (int)src_.size())
-  {
-    src_.resize(index + 1, nullptr);
-  }
-  src_[index] = ptr;
-}
-
-void GPUOperation::SetDst(Tensor *ptr, int index)
-{
-  if (index >= (int)dst_.size())
-  {
-    dst_.resize(index + 1, nullptr);
-  }
-  dst_[index] = ptr;
-}
-
-GPUOperation::GPUOperation(GPUOperation &&operation)
-  : args_(std::move(operation.args_)), code_(std::move(operation.code_)),
-    work_group_size_(operation.work_group_size_),
-    compiler_options_(std::move(operation.compiler_options_)),
-    tensor_to_grid_(operation.tensor_to_grid_), elementwise_(operation.elementwise_),
-    linkable_(operation.linkable_), check_src_channels_size_(operation.check_src_channels_size_),
-    definition_(std::move(operation.definition_)), src_(std::move(operation.src_)),
-    dst_(std::move(operation.dst_)), kernel_(std::move(operation.kernel_)),
-    grid_dimension_(operation.grid_dimension_),
-    work_group_launch_order_(operation.work_group_launch_order_), grid_size_(operation.grid_size_),
-    src_tensors_names_(std::move(operation.src_tensors_names_)),
-    dst_tensors_names_(std::move(operation.dst_tensors_names_)),
-    work_groups_count_(operation.work_groups_count_), linkable_count_(operation.linkable_count_),
-    elementwise_code_(std::move(operation.elementwise_code_))
-{
-}
-
-GPUOperation &GPUOperation::operator=(GPUOperation &&operation)
-{
-  if (this != &operation)
-  {
-    args_ = std::move(operation.args_);
-    code_ = std::move(operation.code_);
-    std::swap(work_group_size_, operation.work_group_size_);
-    compiler_options_ = std::move(operation.compiler_options_);
-    tensor_to_grid_ = operation.tensor_to_grid_;
-    elementwise_ = operation.elementwise_;
-    linkable_ = operation.linkable_;
-    check_src_channels_size_ = operation.check_src_channels_size_;
-    definition_ = std::move(operation.definition_);
-    src_ = std::move(operation.src_);
-    dst_ = std::move(operation.dst_);
-    kernel_ = std::move(operation.kernel_);
-    std::swap(grid_dimension_, operation.grid_dimension_);
-    std::swap(work_group_launch_order_, operation.work_group_launch_order_);
-    std::swap(grid_size_, operation.grid_size_);
-    src_tensors_names_ = std::move(operation.src_tensors_names_);
-    dst_tensors_names_ = std::move(operation.dst_tensors_names_);
-    std::swap(work_groups_count_, operation.work_groups_count_);
-    std::swap(linkable_count_, operation.linkable_count_);
-    elementwise_code_ = std::move(operation.elementwise_code_);
-  }
-  return *this;
-}
-
-absl::Status GPUOperation::AddOperation(GPUOperation *operation)
-{
-  linkable_count_ += 1;
-  std::string code = operation->code_;
-  std::string unique_postfix = absl::StrCat("_link", linkable_count_);
-  operation->args_.RenameArgs(unique_postfix, &code);
-  elementwise_code_ += "{\n" + code + "\n}\n";
-  RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix));
-  for (size_t i = 0; i < operation->src_tensors_names_.size(); ++i)
-  {
-    definition_.src_tensors.push_back(operation->definition_.src_tensors[i + 1]);
-    src_tensors_names_.push_back(operation->src_tensors_names_[i] + unique_postfix);
-  }
-  for (size_t i = 0; i < operation->dst_tensors_names_.size(); ++i)
-  {
-    dst_tensors_names_.push_back(operation->dst_tensors_names_[i] + unique_postfix);
-  }
-  return absl::OkStatus();
-}
-
-void GPUOperation::AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
-  src_tensors_names_.push_back(tensor_name);
-  auto desc_new = std::make_unique<TensorDescriptor>(desc);
-  args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc)
-{
-  src_tensors_names_.push_back(buffer_name);
-  auto desc_new = std::make_unique<BufferDescriptor>(desc);
-  args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
-  dst_tensors_names_.push_back(tensor_name);
-  auto desc_new = std::make_unique<TensorDescriptor>(desc);
-  args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
-}
-
-absl::Status GPUOperation::UpdateParams()
-{
-  for (size_t i = 0; i < src_tensors_names_.size(); ++i)
-  {
-    RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i]));
-  }
-  for (size_t i = 0; i < dst_tensors_names_.size(); ++i)
-  {
-    RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
-  }
-  RETURN_IF_ERROR(BindArguments(&args_));
-  grid_size_ = GetGridSize();
-  work_groups_count_ =
-    GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-  return absl::OkStatus();
-}
-
-absl::Status GPUOperation::AssembleCode(const DeviceInfo &device_info, CLContext *context)
-{
-  if (elementwise_)
-  {
-    auto src_desc = absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
-    if (definition_.IsBatchSupported())
-    {
-      src_desc->SetStateVar("BatchedWidth", "true");
-    }
-    src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
-    args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
-
-    auto dst_desc = absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
-    if (definition_.IsBatchSupported())
-    {
-      dst_desc->SetStateVar("BatchedWidth", "true");
-    }
-    dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
-    args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
-
-    elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
-    code_ = GetElementWiseCode(definition_, check_src_channels_size_);
-    RETURN_IF_ERROR(args_.AllocateObjects(context));
-    RETURN_IF_ERROR(
-      args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
-  }
-  else
-  {
-    RETURN_IF_ERROR(args_.AllocateObjects(context));
-    RETURN_IF_ERROR(
-      args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status GPUOperation::Compile(const CreationContext &creation_context)
-{
-  RETURN_IF_ERROR(AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
-  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
-    code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device,
-    &kernel_));
-  return PostCompileCheck(creation_context.device->info_, kernel_.info_);
-}
-
-absl::Status GPUOperation::CompileDeserialized(const CreationContext &creation_context)
-{
-  return creation_context.cache->GetOrCreateCLKernel(code_, "main_function", compiler_options_,
-                                                     *creation_context.context,
-                                                     *creation_context.device, &kernel_);
-}
-
-void GPUOperation::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                               const DeviceInfo &device_info,
-                                               const KernelInfo &kernel_info,
-                                               std::vector<int3> *work_groups) const
-{
-  GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-absl::Status GPUOperation::Tune(const TuningParameters &params)
-{
-  std::vector<int3> possible_work_groups;
-  GetPossibleKernelWorkGroups(params.tuning_type, *params.info, kernel_.info_,
-                              &possible_work_groups);
-  if (possible_work_groups.empty())
-  {
-    return absl::NotFoundError("Can not found work_group size to launch kernel");
-  }
-  if (possible_work_groups.size() == 1)
-  {
-    work_group_size_ = possible_work_groups[0];
-    work_groups_count_ =
-      GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-    return absl::OkStatus();
-  }
-  else
-  {
-    std::vector<int3> work_groups_count(possible_work_groups.size());
-    for (size_t i = 0; i < work_groups_count.size(); ++i)
-    {
-      work_groups_count[i] = GetWorkGroupsCount(grid_dimension_, grid_size_,
-                                                possible_work_groups[i], work_group_launch_order_);
-    }
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    int best_work_group_index;
-    RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
-      kernel_, *params.info, work_groups_count, possible_work_groups, &best_work_group_index));
-    work_group_size_ = possible_work_groups[best_work_group_index];
-    work_groups_count_ =
-      GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-    return absl::OkStatus();
-  }
-}
-
-int3 GPUOperation::GetGridSize() const
-{
-  if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
-    const int grid_z = dst_[0]->Slices();
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
-    const int grid_z = 1;
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height();
-    const int grid_z = dst_[0]->Depth();
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1)
-  {
-    const int grid_x = dst_[0]->Batch();
-    const int grid_y = 1;
-    const int grid_z = 1;
-    return int3(grid_x, grid_y, grid_z);
-  }
-  return grid_size_;
-}
-
-void GPUOperation::AddUniquePostfix(const std::string &unique_postfix)
-{
-  for (uint32_t i = 0; i < src_tensors_names_.size(); ++i)
-  {
-    src_tensors_names_[i] += unique_postfix;
-  }
-  for (uint32_t i = 0; i < dst_tensors_names_.size(); ++i)
-  {
-    dst_tensors_names_[i] += unique_postfix;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h

deleted file mode 100644 (file)

index 4f531c6..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-
-#include <string>
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/Arguments.h"
-#include "open_cl/Buffer.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/ClProgram.h"
-#include "open_cl/DataType.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ProgramCache.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Types.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// kCustom: default value
-//   GPUOperation::GetGridSize must be overloaded
-// kWBToX_HDToY_SToZ:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
-//   grid_z = dst_[0]->Slices();
-// kWBToX_HDToY_ZIs1:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
-//   grid_z = 1;
-// kWBToX_HToY_DToZ:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height();
-//   grid_z = dst_[0]->Depth();
-// kBToX_YIs1_ZIs1:
-//   grid_x = dst_[0]->Batch();
-//   grid_y = 1;
-//   grid_z = 1;
-enum class TensorToGrid
-{
-  kCustom,
-  kWBToX_HDToY_SToZ,
-  kWBToX_HDToY_ZIs1,
-  kWBToX_HToY_DToZ,
-  kBToX_YIs1_ZIs1
-};
-
-struct CreationContext
-{
-  const CLDevice *device;
-  CLContext *context;
-  CLCommandQueue *queue;
-  ProgramCache *cache;
-
-  const DeviceInfo &GetDeviceInfo() const { return device->info_; }
-};
-
-struct OperationDef
-{
-  CalculationsPrecision precision;
-  std::vector<TensorDescriptor> src_tensors;
-  std::vector<TensorDescriptor> dst_tensors;
-
-  // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
-  DataType GetDataType() const;
-  // Primary means the first src tensor, because first tensor usually defines
-  // the structure of kernel, all other resources(biases) types and etc.
-  DataType GetPrimaryDataType() const;
-  TensorStorageType GetPrimaryStorageType() const;
-  bool IsBatchSupported() const;
-};
-
-// GPUOperation represents some implementation of neural network operation on
-// GPU. GPUOperation can contain another GPU operations with flag elementwise_.
-// When GPUOperation contains another GPU ops, this GPUoperation replaces
-// some sequence of operations Op + op0 + op1 + ...
-// Because of this abilities of GPUOperation, usage scenario is next:
-// Create instance of GPUOperation.
-// Create all instances of GPUOperations that we will(probably) attach
-// to GPUOperation. Attach all GPUOperations to GPUOperation. Call
-// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
-// attached, it useless(and may be error)
-class GPUOperation
-{
-public:
-  GPUOperation() = default;
-  explicit GPUOperation(const OperationDef &definition);
-  virtual ~GPUOperation() = default;
-  // Move only
-  GPUOperation(GPUOperation &&operation);
-  GPUOperation &operator=(GPUOperation &&operation);
-  GPUOperation(const GPUOperation &) = delete;
-  GPUOperation &operator=(const GPUOperation &) = delete;
-
-  absl::Status AddOperation(GPUOperation *operation);
-
-  void SetSrc(Tensor *ptr, int index = 0);
-  void SetDst(Tensor *ptr, int index = 0);
-
-  // should be called after changes of inputs/outputs.
-  absl::Status UpdateParams();
-
-  absl::Status AddToQueue(CLCommandQueue *queue)
-  {
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    return queue->Dispatch(kernel_, work_groups_count_, work_group_size_);
-  }
-
-  virtual void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                           const KernelInfo &kernel_info,
-                                           std::vector<int3> *work_groups) const;
-
-  absl::Status Tune(const TuningParameters &params);
-
-  absl::Status AssembleCode(const DeviceInfo &device_info, CLContext *context);
-
-  absl::Status Compile(const CreationContext &creation_context);
-
-  absl::Status CompileDeserialized(const CreationContext &creation_context);
-
-  virtual absl::Status PostCompileCheck(const DeviceInfo &, const KernelInfo &)
-  {
-    return absl::OkStatus();
-  }
-
-  const OperationDef &GetDefinition() const { return definition_; }
-
-  void AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc);
-  void AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc);
-  void AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc);
-
-  bool IsLinkable() const { return elementwise_ && linkable_; }
-
-  // for linking
-  void AddUniquePostfix(const std::string &unique_postfix);
-
-  Arguments args_;
-  std::string code_;
-  int3 work_group_size_ = int3(8, 4, 1);
-  std::vector<CompilerOptions> compiler_options_;
-  // not applicable to elementwise
-  TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
-
-  bool elementwise_ = false;
-  // applicable only with elementwise_ = true;
-  bool linkable_ = true; // by default every elementwise is linkable
-  // applicable only with elementwise_ = true;
-  bool check_src_channels_size_ = false;
-
-protected:
-  virtual absl::Status BindArguments(ArgumentsBinder *) { return absl::OkStatus(); }
-  virtual int3 GetGridSize() const;
-
-  // Defines operation calculation precision and format of src/dst tensors.
-  OperationDef definition_;
-  std::vector<Tensor *> src_;
-  std::vector<Tensor *> dst_;
-  CLKernel kernel_;
-  int grid_dimension_ = 3; // can be 1, 2 or 3
-  int3 work_group_launch_order_ = int3(0, 1, 2);
-  int3 grid_size_ = int3(0, 0, 0);
-  std::vector<std::string> src_tensors_names_;
-  std::vector<std::string> dst_tensors_names_;
-
-private:
-  int3 work_groups_count_ = int3(0, 0, 0);
-  int linkable_count_ = 0;
-  std::string elementwise_code_; // temporary, used during op construction
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc

deleted file mode 100644 (file)

index ceeab2f..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Pooling.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetAveragePoolingKernelCode(const OperationDef &op_def, bool stride_correction,
-                                        GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  std::map<Axis, std::string> axis_to_src_coord = {
-    {Axis::WIDTH, "x_c"},  {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::map<Axis, std::string> axis_to_dst_coord = {
-    {Axis::WIDTH, "X"},    {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::vector<std::string> src_coords;
-  std::vector<std::string> dst_coords;
-  for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
-  {
-    if (op_def.dst_tensors[0].HasAxis(axis))
-    {
-      dst_coords.push_back(axis_to_dst_coord[axis]);
-    }
-    if (op_def.src_tensors[0].HasAxis(axis))
-    {
-      src_coords.push_back(axis_to_src_coord[axis]);
-    }
-  }
-  std::string src_coord = src_coords[0];
-  for (size_t i = 1; i < src_coords.size(); ++i)
-  {
-    src_coord += ", " + src_coords[i];
-  }
-  std::string dst_coord = dst_coords[0];
-  for (size_t i = 1; i < dst_coords.size(); ++i)
-  {
-    dst_coord += ", " + dst_coords[i];
-  }
-
-  const bool manual_clamp = op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER ||
-                            op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  float4 r = (float4)(0.0f);\n";
-  c += "  float window_size = 0.0;\n";
-  if (stride_correction)
-  {
-    c += "  int xs = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int ys = Y * args.stride_y + args.padding_y;\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int ds = D * args.stride_z + args.padding_z;\n";
-    c += "  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    c += "    int d_c = ds + kz;\n";
-    c += "    if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
-  }
-  c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-  c += "    int y_c = ys + ky;\n";
-  c += "    bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
-  c += "    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "      int x_c = xs + kx * args.src_tensor.Batch();\n";
-  }
-  else
-  {
-    c += "      int x_c = xs + kx;\n";
-  }
-  c += "      bool outside = outside_y || x_c < 0 || x_c >= "
-       "args.src_tensor.Width();\n";
-  if (manual_clamp)
-  {
-    c += "     r += !outside ? args.src_tensor.Read<float>(" + src_coord +
-         ") : "
-         "(float4)(0.0f);\n";
-  }
-  else
-  {
-    c += "      r += args.src_tensor.Read<float>(" + src_coord + ");\n";
-  }
-  c += "        window_size += !outside ? 1.0 : 0.0;\n";
-  c += "    }\n";
-  c += "  }\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  }  // Depth\n";
-  }
-  // If window_size==0, window covered nothing. This situation is a sign of
-  // incorrectly constructed operation. NaNs are expected as output.
-  c += "  FLT4 result = TO_FLT4(r / window_size);\n";
-  c += "  args.dst_tensor.Write(result, " + dst_coord + ");\n";
-  c += "}\n";
-
-  return c;
-}
-
-std::string GetMaxPoolingKernelCode(const OperationDef &op_def, bool stride_correction,
-                                    bool output_indices, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-  if (output_indices)
-  {
-    auto dst_ind_desc = op_def.dst_tensors[1];
-    if (op_def.IsBatchSupported())
-    {
-      dst_ind_desc.SetStateVar("BatchedWidth", "true");
-    }
-    op->AddDstTensor("dst_indices", dst_ind_desc);
-  }
-
-  std::map<Axis, std::string> axis_to_src_coord = {
-    {Axis::WIDTH, "x_c"},  {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::map<Axis, std::string> axis_to_dst_coord = {
-    {Axis::WIDTH, "X"},    {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::vector<std::string> src_coords;
-  std::vector<std::string> dst_coords;
-  for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
-  {
-    if (op_def.dst_tensors[0].HasAxis(axis))
-    {
-      dst_coords.push_back(axis_to_dst_coord[axis]);
-    }
-    if (op_def.src_tensors[0].HasAxis(axis))
-    {
-      src_coords.push_back(axis_to_src_coord[axis]);
-    }
-  }
-  std::string src_coord = src_coords[0];
-  for (size_t i = 1; i < src_coords.size(); ++i)
-  {
-    src_coord += ", " + src_coords[i];
-  }
-  std::string dst_coord = dst_coords[0];
-  for (size_t i = 1; i < dst_coords.size(); ++i)
-  {
-    dst_coord += ", " + dst_coords[i];
-  }
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  FLT4 maximum = (FLT4)(-10000.0f);\n";
-  if (output_indices)
-  {
-    c += "  FLT4 indexes = (FLT4)(0.0f);\n";
-  }
-  if (stride_correction)
-  {
-    c += "  int xs = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int ys = Y * args.stride_y + args.padding_y;\n";
-  c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-  c += "    int y_c = ys + ky;\n";
-  c += "    if (y_c < 0 || y_c >= args.src_tensor.Height()) continue;\n";
-  c += "    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "      int x_c = xs + kx * args.src_tensor.Batch();\n";
-  }
-  else
-  {
-    c += "      int x_c = xs + kx;\n";
-  }
-  c += "      if (x_c < 0 || x_c >= args.src_tensor.Width()) continue;\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "    int ds = D * args.stride_z + args.padding_z;\n";
-    c += "    for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    c += "    int d_c = ds + kz;\n";
-    c += "      if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
-  }
-  c += "      FLT4 src = args.src_tensor.Read(" + src_coord + ");\n";
-  if (output_indices)
-  {
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "      FLT index_counter = (FLT)((ky * args.kernel_size_x + kx) * "
-           "args.kernel_size_z + kz) + (FLT)(0.1f);\n";
-    }
-    else
-    {
-      c += "      FLT index_counter = (FLT)(ky * args.kernel_size_x + kx) + "
-           "(FLT)(0.1f);\n";
-    }
-    c += "      if (src.x > maximum.x) {\n";
-    c += "        indexes.x = index_counter;\n";
-    c += "        maximum.x = src.x;\n";
-    c += "      }\n";
-    c += "      if (src.y > maximum.y) {\n";
-    c += "        indexes.y = index_counter;\n";
-    c += "        maximum.y = src.y;\n";
-    c += "      }\n";
-    c += "      if (src.z > maximum.z) {\n";
-    c += "        indexes.z = index_counter;\n";
-    c += "        maximum.z = src.z;\n";
-    c += "      }\n";
-    c += "      if (src.w > maximum.w) {\n";
-    c += "        indexes.w = index_counter;\n";
-    c += "        maximum.w = src.w;\n";
-    c += "      }\n";
-  }
-  else
-  {
-    c += "      maximum = max(src, maximum);\n";
-  }
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "    }  // Depth\n";
-  }
-  c += "    }\n";
-  c += "  }\n";
-  c += "  args.dst_tensor.Write(maximum, " + dst_coord + ");\n";
-  if (output_indices)
-  {
-    c += "  args.dst_indices.Write(indexes, " + dst_coord + ");\n";
-  }
-  c += "}\n";
-
-  return c;
-}
-} // namespace
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.kernel.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("kernel_size_y", attr.kernel.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  if (attr.type == PoolingType::AVERAGE)
-  {
-    op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
-  }
-  else if (attr.type == PoolingType::MAX)
-  {
-    op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
-  }
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.kernel.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("kernel_size_y", attr.kernel.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("kernel_size_z", attr.kernel.d);
-  op.args_.AddInt("padding_z", -attr.padding.prepended.d);
-  op.args_.AddInt("stride_z", attr.strides.d);
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  if (attr.type == PoolingType::AVERAGE)
-  {
-    op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
-  }
-  else if (attr.type == PoolingType::MAX)
-  {
-    op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
-  }
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h

deleted file mode 100644 (file)

index 166d815..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr);
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc

deleted file mode 100644 (file)

index 37f87e5..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Relu.h"
-
-#include <string>
-#include "Util.h"
-#include "GpuOperation.h"
-#include "absl/strings/str_cat.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.elementwise_ = true;
-
-  std::string min_func;
-  if (attr.alpha != 0.0f)
-  {
-    min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))";
-    if (definition.precision == CalculationsPrecision::F32)
-    {
-      op.args_.AddFloat("alpha", attr.alpha);
-    }
-    else
-    {
-#ifdef FIXME_PORTING_HALF_REQIRED
-      op.args_.AddHalf("alpha", half(attr.alpha));
-#endif
-    }
-  }
-  else
-  {
-    min_func = "(FLT)(0.0f)";
-  }
-  if (attr.clip != 0.0f)
-  {
-    if (definition.precision == CalculationsPrecision::F32)
-    {
-      op.args_.AddFloat("clip", attr.clip);
-    }
-    else
-    {
-#ifdef FIXME_PORTING_HALF_REQIRED
-      op.args_.AddHalf("clip", half(attr.clip));
-#endif
-    }
-    op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + ", args.clip);");
-  }
-  else
-  {
-    op.code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");");
-  }
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h

deleted file mode 100644 (file)

index eb6b1ad..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Operations.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc

deleted file mode 100644 (file)

index cdd3e83..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GetReshapeCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = linear_id / args.dst_tensor.Batch();\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0);\n";
-  }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  FLT temps[4];\n";
-  c += "  temps[0] = (FLT)(0.0f);\n";
-  c += "  temps[1] = (FLT)(0.0f);\n";
-  c += "  temps[2] = (FLT)(0.0f);\n";
-  c += "  temps[3] = (FLT)(0.0f);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int base = B;\n";
-  }
-  else
-  {
-    c += "  int base = 0;\n";
-  }
-  c += "  base = ((base * args.dst_tensor.Height() + Y) * "
-       "args.dst_tensor.Width() + X) * args.dst_tensor.Channels() + Z * 4;\n";
-  c += "  for (int i = 0; i < 4; ++i) {\n";
-  c += "    int dst_channel = Z * 4 + i;\n";
-  c += "    if (dst_channel < args.dst_tensor.Channels()) {;\n";
-  c += "      int p = base + i;\n";
-  c += "      int src_c = p % args.src_tensor.Channels();\n";
-  c += "      p = p / args.src_tensor.Channels();\n";
-  c += "      int src_x = p % args.src_tensor.Width();\n";
-  c += "      p = p / args.src_tensor.Width();\n";
-  c += "      int src_y = p % args.src_tensor.Height();\n";
-  if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int src_b = p / args.src_tensor.Height();\n";
-    c += "  args.src_tensor.SetBatchRef(src_b);\n";
-  }
-  c += "      int src_z = src_c / 4;\n";
-  c += "      int src_sub_ch = src_c % 4;\n";
-  c += "      FLT4 t = args.src_tensor.Read(src_x, src_y, src_z);\n";
-  c += "      FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n";
-  c += "      temps[i] = t_ar[src_sub_ch];\n";
-  c += "    }\n";
-  c += "  }\n";
-  c += "  FLT4 result = (FLT4)(temps[0], temps[1], temps[2], temps[3]);\n";
-  c += "  args.dst_tensor.Write(result, X, Y, Z);\n";
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshape(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
-  op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
-  op.code_ = GetReshapeCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h

deleted file mode 100644 (file)

index 4f7c5ea..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReshape(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc

deleted file mode 100644 (file)

index 13010e7..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetReshapeCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = linear_id / args.dst_tensor.Batch();\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0);\n";
-  }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int dst_bhwc4 = B;\n";
-  }
-  else
-  {
-    c += "  int dst_bhwc4 = 0;\n";
-  }
-  c += "  dst_bhwc4 = ((dst_bhwc4 * args.dst_tensor.Height() + Y) * "
-       "args.dst_tensor.Width() + X) * args.dst_tensor.Slices() + Z;\n";
-  c += "  int src_z = dst_bhwc4 % args.src_tensor.Slices();\n";
-  c += "  dst_bhwc4 = dst_bhwc4 / args.src_tensor.Slices();\n";
-  c += "  int src_x = dst_bhwc4 % args.src_tensor.Width();\n";
-  c += "  dst_bhwc4 = dst_bhwc4 / args.src_tensor.Width();\n";
-  c += "  int src_y = dst_bhwc4 % args.src_tensor.Height();\n";
-  if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int src_b = dst_bhwc4 / args.src_tensor.Height();\n";
-    c += "  args.src_tensor.SetBatchRef(src_b);\n";
-  }
-  c += "  FLT4 result = args.src_tensor.Read(src_x, src_y, src_z);\n";
-  c += "  args.dst_tensor.Write(result, X, Y, Z);\n";
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshapex4(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
-  op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
-  op.code_ = GetReshapeCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h

deleted file mode 100644 (file)

index 8988e8b..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0
-GPUOperation CreateReshapex4(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc

deleted file mode 100644 (file)

index 4ee164d..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax.h"
-
-#include <string>
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "GpuOperation.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-std::string GetSoftmaxKernelCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
-       "return; \n";
-  c += "  float sum = 0.0f;\n";
-  c += "  for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
-  c += "    float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
-  c += "    sum += exp(t.x);\n";
-  c += "    if (d * 4 + 1 < args.dst_tensor.Channels()) sum += exp(t.y);\n";
-  c += "    if (d * 4 + 2 < args.dst_tensor.Channels()) sum += exp(t.z);\n";
-  c += "    if (d * 4 + 3 < args.dst_tensor.Channels()) sum += exp(t.w);\n";
-  c += "  }\n";
-  c += "  for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
-  c += "    float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
-  c += "    t = exp(t) / sum;\n";
-  c += "    FLT4 result = TO_FLT4(t);\n";
-  c += "    args.dst_tensor.Write(result, X, Y, d);\n";
-  c += "  }\n";
-  c += "}\n";
-  return c;
-}
-} // namespace
-
-GPUOperation CreateSoftmax(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  auto src_desc = definition.src_tensors[0];
-  if (definition.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op.AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = definition.dst_tensors[0];
-  if (definition.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op.AddDstTensor("dst_tensor", dst_desc);
-  op.code_ = GetSoftmaxKernelCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h

deleted file mode 100644 (file)

index 594bab0..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateSoftmax(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc

deleted file mode 100644 (file)

index 590952d..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax1x1.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Softmax1x1::Softmax1x1(const OperationDef &definition) : GPUOperation(definition)
-{
-  work_group_size_ = int3(32, 1, 1);
-  code_ = GetSoftmaxKernelCode(definition_);
-}
-
-Softmax1x1::Softmax1x1(Softmax1x1 &&kernel) : GPUOperation(std::move(kernel)) {}
-
-Softmax1x1 &Softmax1x1::operator=(Softmax1x1 &&kernel)
-{
-  if (this != &kernel)
-  {
-    GPUOperation::operator=(std::move(kernel));
-  }
-  return *this;
-}
-
-std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef &op_def)
-{
-  AddSrcTensor("src_tensor", op_def.src_tensors[0]);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-  args_.AddFloat("mask_x");
-  args_.AddFloat("mask_y");
-  args_.AddFloat("mask_z");
-  args_.AddFloat("mask_w");
-  args_.AddInt("slices_x32");
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "  int batch_id = get_global_id(1);\n";
-    c += "  if (batch_id >= args.dst_tensor.Batch()) return;\n";
-    c += "  args.dst_tensor.SetBatchRef(batch_id);\n";
-    c += "  args.src_tensor.SetBatchRef(batch_id);\n";
-  }
-  c += "  float4 mask = (float4)(args.mask_x, args.mask_y, args.mask_z, "
-       "args.mask_w);\n";
-  c += "  int offset = 0;\n";
-  c += "  float sum = 0.0f;\n";
-  c += "  int s = 0;\n";
-  c += "  int tid = get_local_id(0);\n";
-  c += "  do {\n";
-  c += "    int z = offset + tid;\n";
-  c += "    if (z < args.dst_tensor.Slices()) {\n";
-  c += "      float4 mask_temp = z == args.dst_tensor.Slices() - 1 ? mask : "
-       "(float4)(1.0f);\n";
-  c += "      float4 src = args.src_tensor.Read<float>(0, 0, z);\n";
-  c += "      sum += dot(mask_temp, exp(src));\n";
-  c += "      offset += 32;\n";
-  c += "    }\n";
-  c += "    s++;\n";
-  c += "  } while (s < args.slices_x32);\n";
-  c += "\n";
-  c += "  __local float4 tmp[8];\n";
-  c += "  __local float* tmpx1 = (__local float*)tmp;\n";
-  c += "  tmpx1[tid] = sum;\n";
-  c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "  if (tid == 0) {\n";
-  c += "    sum = dot((float4)(1.0f), tmp[0]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[1]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[2]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[3]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[4]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[5]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[6]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[7]);\n";
-  c += "    tmpx1[0] = 1.0f / sum;\n";
-  c += "  }\n";
-  c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "  sum = tmpx1[0];\n";
-  c += "\n";
-  c += "  offset = 0;\n";
-  c += "  s = 0;\n";
-  c += "  do {\n";
-  c += "    int z = offset + tid;\n";
-  c += "    if (z < args.dst_tensor.Slices()) {\n";
-  c += "      FLT4 res = TO_FLT4(exp(args.src_tensor.Read<float>(0, 0, "
-       "z))*sum);\n";
-  c += "      args.dst_tensor.Write(res, 0, 0, z);\n";
-  c += "      offset += 32;\n";
-  c += "    }\n";
-  c += "    s++;\n";
-  c += "  } while (s < args.slices_x32);\n";
-  c += "}\n";
-  return c;
-}
-
-absl::Status Softmax1x1::BindArguments(ArgumentsBinder *args)
-{
-  float4 mask = GetMaskForLastPlane(src_[0]->Channels());
-  RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
-  RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
-  RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
-  RETURN_IF_ERROR(args->SetFloat("mask_w", mask.w));
-  RETURN_IF_ERROR(args->SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32)));
-  return absl::OkStatus();
-}
-
-int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); }
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition) { return Softmax1x1(definition); }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h

deleted file mode 100644 (file)

index da375d4..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Softmax1x1 : public GPUOperation
-{
-public:
-  Softmax1x1() = default;
-  explicit Softmax1x1(const OperationDef &definition);
-
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  Softmax1x1(Softmax1x1 &&kernel);
-  Softmax1x1 &operator=(Softmax1x1 &&kernel);
-  Softmax1x1(const Softmax1x1 &) = delete;
-  Softmax1x1 &operator=(const Softmax1x1 &) = delete;
-
-  friend Softmax1x1 CreateSoftmax1x1();
-
-private:
-  std::string GetSoftmaxKernelCode(const OperationDef &op_def);
-};
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h

deleted file mode 100644 (file)

index 3d99b4f..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-#define __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/DeviceInfo.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TuningType
-{
-  EXHAUSTIVE,
-  FAST
-};
-
-struct TuningParameters
-{
-  ProfilingCommandQueue *queue;
-  const DeviceInfo *info;
-  TuningType tuning_type = TuningType::EXHAUSTIVE;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc

deleted file mode 100644 (file)

index df42c66..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include <cfloat>
-#include <cmath>
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision)
-{
-  std::string result;
-
-  switch (precision)
-  {
-    case CalculationsPrecision::F32:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#define ACCUM_FLT4 float4\n";
-      result += "#define FLT float\n";
-      result += "#define FLT2 float2\n";
-      result += "#define FLT3 float3\n";
-      result += "#define FLT4 float4\n";
-      result += "#define TO_FLT4 convert_float4\n";
-      result += "#define TO_ACCUM_TYPE convert_float4\n";
-      result += "#define TO_ACCUM_FLT convert_float\n";
-      break;
-    case CalculationsPrecision::F16:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-      result += "#define ACCUM_FLT4 half4\n";
-      result += "#define FLT half\n";
-      result += "#define FLT2 half2\n";
-      result += "#define FLT3 half3\n";
-      result += "#define FLT4 half4\n";
-      result += "#define TO_FLT4 convert_half4\n";
-      result += "#define TO_ACCUM_TYPE convert_half4\n";
-      result += "#define TO_ACCUM_FLT convert_half\n";
-      break;
-    case CalculationsPrecision::F32_F16:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-      result += "#define ACCUM_FLT4 float4\n";
-      result += "#define FLT half\n";
-      result += "#define FLT2 half2\n";
-      result += "#define FLT3 half3\n";
-      result += "#define FLT4 half4\n";
-      result += "#define TO_FLT4 convert_half4\n";
-      result += "#define TO_ACCUM_TYPE convert_float4\n";
-      result += "#define TO_ACCUM_FLT convert_float\n";
-      break;
-  }
-  return result;
-}
-
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
-                                  const std::string &stride_x, const std::string &padding_x)
-{
-  // int p0 = src_x / batch_size;\n";
-  // int b0 = src_x % batch_size;\n";
-  // return (p0 * stride_x + padding_x) * batch_size + b0;\n";
-  return absl::Substitute("(((($0) / $1) * $2 + $3) * $1 + ($0) % $1)", src_x, batch_size, stride_x,
-                          padding_x);
-}
-
-float4 GetMaskForLastPlane(int channels)
-{
-  float4 mask = float4(0.0f);
-  const int reminder = channels % 4 == 0 ? 4 : channels % 4;
-  for (int i = 0; i < reminder; ++i)
-  {
-    mask[i] = 1.0f;
-  }
-  return mask;
-}
-
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size)
-{
-  for (const auto &wg : wgs)
-  {
-    const int wg_size = wg.x * wg.y * wg.z;
-    if (wg_size <= max_wg_size)
-    {
-      return wg;
-    }
-  }
-  return {1, 1, 1};
-}
-
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device_info, CalculationsPrecision precision,
-                                   int task_size)
-{
-  const float task_size_per_cu = task_size / static_cast<float>(device_info.compute_units_count);
-  int block_size = 1;
-  float threshold_1 = FLT_MAX;
-  float threshold_2 = FLT_MAX;
-  float threshold_4 = FLT_MAX;
-  if (!device_info.IsMali())
-  {
-    return 1;
-  }
-  MaliInfo mali_info = device_info.mali_info;
-  switch (precision)
-  {
-    case CalculationsPrecision::F16:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 4.0f;
-        threshold_4 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 256.0f * 2.0f;
-        threshold_2 = 256.0f * 8.0f;
-        threshold_4 = 256.0f * 16.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 6.0f;
-        threshold_4 = 256.0f * 16.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 4.0f;
-        threshold_2 = 256.0f * 16.0f;
-      }
-      break;
-    case CalculationsPrecision::F32_F16:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 3.0f;
-        threshold_4 = 256.0f * 32.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 256.0f * 2.0f;
-        threshold_2 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 4.0f;
-      }
-      break;
-    case CalculationsPrecision::F32:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 4.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 128.0f;
-        threshold_2 = 256.0f * 4.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 12.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 16.0f;
-      }
-      break;
-  }
-  if (task_size_per_cu <= threshold_1)
-  {
-    block_size = 1;
-  }
-  else if (task_size_per_cu <= threshold_2)
-  {
-    block_size = 2;
-  }
-  else if (task_size_per_cu <= threshold_4)
-  {
-    block_size = 4;
-  }
-  else
-  {
-    block_size = 8;
-  }
-  return block_size;
-}
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size)
-{
-  int3 work_groups_count;
-  work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
-  work_groups_count.y = DivideRoundUp(grid_size.y, work_group_size.y);
-  work_groups_count.z = DivideRoundUp(grid_size.z, work_group_size.z);
-  return work_groups_count;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h

deleted file mode 100644 (file)

index 8363862..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-
-#include <string>
-#include <vector>
-
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
-                                  const std::string &stride_x, const std::string &padding_x);
-
-// Returns float4 mask for last plane(batch of 4 channels)
-// assumes that plane size is 4;
-// for example we have 7 channels, in our data structures we align it to 8
-// but 8s-channel will be empty, then last plane (batch of 4 channels) will
-// have this mask (1, 1, 1, 0).
-float4 GetMaskForLastPlane(int channels);
-
-// returns first work group from wgs that has size not bigger than max_wg_size
-// if no suitable groups among wgs, returns {1, 1, 1}
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size);
-
-// task_size as amount of FLT4 processed elements.
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device, CalculationsPrecision precision,
-                                   int task_size);
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc

deleted file mode 100644 (file)

index 214fec2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkGroupPicking.h"
-
-#include <algorithm>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "open_cl/Util.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-std::vector<int2> Get2DWorkgroupsEqualTo128()
-{
-  return {{128, 1}, {64, 2}, {32, 4}, {16, 8}, {8, 16}, {4, 32}, {2, 64}, {1, 128}};
-}
-
-std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(int multiplier, int3 grid,
-                                                     const KernelInfo &kernel_info,
-                                                     const DeviceInfo &device_info,
-                                                     WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<int3> work_groups;
-  work_groups.reserve(32);
-
-  std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
-
-  for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2)
-  {
-    for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2)
-    {
-      int work_group_size_xy = x * y;
-      if (work_group_size_xy % multiplier != 0 ||
-          work_group_size_xy > kernel_info.max_work_group_size)
-      {
-        continue;
-      }
-      for (auto z : possible_z_sizes)
-      {
-        if (work_group_size_xy * z > kernel_info.max_work_group_size)
-        {
-          continue;
-        }
-        if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
-            z <= device_info.max_work_group_size_z)
-        {
-          work_groups.push_back({x, y, z});
-        }
-      }
-    }
-  }
-  return work_groups;
-}
-
-std::vector<int3> GenerateWorkGroupSizesXMultipleOf(int multiplier, int3 grid,
-                                                    const KernelInfo &kernel_info,
-                                                    const DeviceInfo &device_info,
-                                                    WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<int3> work_groups;
-  work_groups.reserve(32);
-
-  std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
-  std::vector<int> possible_y_sizes = GetPossibleSizes(grid.y, WorkGroupSizeAlignment::PRECISE);
-
-  for (int x = multiplier; x <= kernel_info.max_work_group_size && x < grid.x + multiplier;
-       x += multiplier)
-  {
-    for (auto y : possible_y_sizes)
-    {
-      for (auto z : possible_z_sizes)
-      {
-        if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
-            z <= device_info.max_work_group_size_z && x * y * z <= kernel_info.max_work_group_size)
-        {
-          work_groups.push_back({x, y, z});
-        }
-      }
-    }
-  }
-  return work_groups;
-}
-
-void GetWorkGroupsAlignedToGrid(const DeviceInfo &device_info, const KernelInfo &kernel_info,
-                                const int3 &grid, std::vector<int3> *work_groups)
-{
-  int3 max_wg_size;
-  max_wg_size.x = device_info.max_work_group_size_x;
-  max_wg_size.y = device_info.max_work_group_size_y;
-  max_wg_size.z = device_info.max_work_group_size_z;
-  GenerateWorkGroupSizesAlignedToGrid(grid, max_wg_size, kernel_info.max_work_group_size,
-                                      work_groups);
-}
-
-int GetPenalty(int grid_size, int group_size)
-{
-  const int reminder = grid_size % group_size;
-  return reminder == 0 ? 0 : group_size - reminder;
-}
-
-int GetPenalty(int2 grid_size, int2 group_size)
-{
-  const int p_x = GetPenalty(grid_size.x, group_size.x);
-  const int p_y = GetPenalty(grid_size.y, group_size.y);
-  return p_x * grid_size.y + p_y * grid_size.x + p_x * p_y;
-}
-
-int GetMaxSizeWithMinPenalty(int size, int max_size)
-{
-  int best_size = 128;
-  int min_penalty = GetPenalty(size, best_size);
-  for (int i = 2; i * 128 <= max_size; ++i)
-  {
-    if (GetPenalty(size, i * 128) == min_penalty)
-    {
-      best_size = i * 128;
-    }
-  }
-  return best_size;
-}
-
-int2 GetMaxSizeWithMinPenalty(int2 size, int max_size)
-{
-  std::vector<int2> base_groups = Get2DWorkgroupsEqualTo128();
-  int min_penalty = std::numeric_limits<int>::max();
-  for (const auto &group : base_groups)
-  {
-    min_penalty = std::min(GetPenalty(size, group), min_penalty);
-  }
-  for (const auto &group : base_groups)
-  {
-    for (int y = 1; y * group.y <= max_size; ++y)
-    {
-      int new_group_y = y * group.y;
-      for (int x = 1; x * group.x <= max_size; ++x)
-      {
-        int new_group_x = x * group.x;
-        if (new_group_x * new_group_y > max_size)
-        {
-          break;
-        }
-        if (GetPenalty(size, int2(new_group_x, new_group_y)) == min_penalty)
-        {
-          return int2(new_group_x, new_group_y);
-        }
-      }
-    }
-  }
-  return int2(0, 0);
-}
-
-int GetBiggestDividerWithPriority(int number, int max_divider)
-{
-  if (number % 8 == 0 && 8 <= max_divider)
-  {
-    return 8;
-  }
-  if (number % 4 == 0 && 4 <= max_divider)
-  {
-    return 4;
-  }
-  if (number % 2 == 0 && 2 <= max_divider)
-  {
-    return 2;
-  }
-  for (int i = max_divider; i != 0; i--)
-  {
-    if (number % i == 0)
-    {
-      return i;
-    }
-  }
-  return 1;
-}
-
-int GetBiggestDivider(int number, int max_divider)
-{
-  for (int i = max_divider; i != 0; i--)
-  {
-    if (number % i == 0)
-    {
-      return i;
-    }
-  }
-  return 1;
-}
-
-} // namespace
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid)
-{
-  int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
-  if (grid.x <= 128)
-  {
-    return int3(128, 1, grid_z);
-  }
-  int grid_x = GetMaxSizeWithMinPenalty(grid.x, 512 / grid_z);
-  return {grid_x, 1, grid_z};
-}
-
-int3 GetWorkGroupXY128Conv(const int3 &grid)
-{
-  int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
-  if (grid.x <= 16 && grid.y <= 8)
-  {
-    return int3(16, 8, grid_z);
-  }
-  int2 grid_xy = GetMaxSizeWithMinPenalty(int2(grid.x, grid.y), 512 / grid_z);
-  return int3(grid_xy.x, grid_xy.y, grid_z);
-}
-
-// int3 GetWorkGroupXY128Simple(const int3& grid) { return int3(16, 8, 1); }
-
-int3 GetWorkGroup(const int3 &grid, int max_size)
-{
-  int wg_z = GetBiggestDividerWithPriority(grid.z, 8);
-  int wg_xy_size = max_size / wg_z;
-  int wg_x = std::min(DivideRoundUp(grid.x, 2), wg_xy_size);
-  int wg_y = std::min(wg_xy_size / wg_x, grid.y);
-  return int3(wg_x, wg_y, wg_z);
-}
-
-int3 GetWorkGroupConv(const int3 &grid, int max_size, int max_z_size)
-{
-  int wg_z = GetBiggestDivider(grid.z, max_z_size);
-  int wg_xy_size = std::min(256, max_size) / wg_z;
-  int wg_x = std::min(grid.x, wg_xy_size);
-  int wg_y = std::min(wg_xy_size / wg_x, grid.y);
-  if (wg_y == grid.y && grid.y % 2 == 0)
-  {
-    wg_y = grid.y / 2;
-  }
-  return int3(wg_x, wg_y, wg_z);
-}
-
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                       const KernelInfo &kernel_info, const int3 &grid,
-                                       WorkGroupSizeAlignment z_alignment,
-                                       std::vector<int3> *work_groups)
-{
-  *work_groups =
-    GenerateWorkGroupSizesXYMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                      const KernelInfo &kernel_info, const int3 &grid,
-                                      WorkGroupSizeAlignment z_alignment,
-                                      std::vector<int3> *work_groups)
-{
-  *work_groups =
-    GenerateWorkGroupSizesXMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height)
-{
-  int planar_work_groups = DivideRoundUp(width * height, 128);
-  auto base_work_groups = Get2DWorkgroupsEqualTo128();
-  bool have_equal_work_groups = false;
-  for (auto &work_group : base_work_groups)
-  {
-    int x_groups = DivideRoundUp(width, work_group.x);
-    int y_groups = DivideRoundUp(height, work_group.y);
-    int xy_groups = x_groups * y_groups;
-    if (xy_groups == planar_work_groups)
-    {
-      have_equal_work_groups = true;
-      break;
-    }
-  }
-  return !have_equal_work_groups;
-}
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                           const KernelInfo &kernel_info, const int3 &grid,
-                           std::vector<int3> *work_groups)
-{
-  switch (tuning_type)
-  {
-    case TuningType::FAST:
-      work_groups->push_back(GetWorkGroup(grid, kernel_info.max_work_group_size));
-      return;
-    case TuningType::EXHAUSTIVE:
-    {
-      GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
-      return;
-    }
-    default:
-      work_groups->push_back({8, 4, 1});
-      return;
-  }
-}
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
-                               const KernelInfo &kernel_info, const int3 &grid,
-                               std::vector<int3> *work_groups)
-{
-  switch (tuning_type)
-  {
-    case TuningType::FAST:
-    {
-      int max_z_size = 16;
-      if (device_info.IsAdreno())
-      {
-        max_z_size = device_info.IsAdreno3xx() ? 16 : 64;
-      }
-      max_z_size = std::min(max_z_size, device_info.max_work_group_size_z);
-      work_groups->push_back(GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
-      return;
-    }
-    case TuningType::EXHAUSTIVE:
-    {
-      GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
-      return;
-    }
-    default:
-      work_groups->push_back({8, 4, 1});
-      return;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h

deleted file mode 100644 (file)

index c19890d..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/ClKernel.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Types.h"
-#include "open_cl/WorkgroupSelection.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// multiplier can be power of two only
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                       const KernelInfo &kernel_info, const int3 &grid,
-                                       WorkGroupSizeAlignment z_alignment,
-                                       std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                      const KernelInfo &kernel_info, const int3 &grid,
-                                      WorkGroupSizeAlignment z_alignment,
-                                      std::vector<int3> *work_groups);
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid);
-
-int3 GetWorkGroupXY128Simple(const int3 &grid);
-int3 GetWorkGroupXY128Conv(const int3 &grid);
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height);
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                           const KernelInfo &kernel_info, const int3 &grid,
-                           std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
-                               const KernelInfo &kernel_info, const int3 &grid,
-                               std::vector<int3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc

deleted file mode 100644 (file)

index eac6f32..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/kernels/ConvBuffer1x1.h"
-#include "open_cl/kernels/ConvConstants.h"
-#include "open_cl/kernels/ConvPowervr.h"
-#include "open_cl/kernels/ConvWeightsConverter.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation> SelectConvolutionAdreno(const Convolution2DAttributes &attr,
-                                                      const BHWC &dst_shape,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def, ModelHints)
-{
-  if (IsConvConstantsSupported(device_info, op_def, attr))
-  {
-    GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
-    return absl::make_unique<GPUOperation>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradAdreno(const Convolution2DAttributes &attr,
-                                                              const BHWC &dst_shape,
-                                                              const DeviceInfo &device_info,
-                                                              const OperationDef &op_def,
-                                                              ModelHints)
-{
-  ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsAdreno(const Convolution2DAttributes &attr,
-                                      const BHWC &weights_shape, const BHWC &dst_shape,
-                                      const DeviceInfo &device_info, const OperationDef &op_def,
-                                      ModelHints, ConvWeightsDescription *weights_desc)
-{
-  ConvPowerVR conv =
-    CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-  *weights_desc = conv.GetConvWeightsDescription();
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionNVidia(const Convolution2DAttributes &attr,
-                                                      const BHWC &dst_shape,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def)
-{
-  if (IsConvConstantsSupported(device_info, op_def, attr))
-  {
-    GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
-    return absl::make_unique<GPUOperation>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionPowerVR(const Convolution2DAttributes &attr,
-                                                       const DeviceInfo &device_info,
-                                                       const OperationDef &op_def)
-{
-  ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr);
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionMali(const Convolution2DAttributes &attr,
-                                                    const BHWC &dst_shape,
-                                                    const DeviceInfo &device_info,
-                                                    const OperationDef &op_def)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
-      IsConvBuffer1x1Supported(op_def, attr))
-  {
-    ConvBuffer1x1 conv = CreateConvBuffer1x1(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradMali(const Convolution2DAttributes &attr,
-                                                            const BHWC &dst_shape,
-                                                            const DeviceInfo &device_info,
-                                                            const OperationDef &op_def)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER)
-  {
-    ConvBuffer1x1 conv = CreateConvBuffer1x1Wino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsMali(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints,
-                                    ConvWeightsDescription *weights_desc)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
-      IsConvBuffer1x1Supported(op_def, weights_shape, attr))
-  {
-    ConvBuffer1x1 conv =
-      CreateConvBuffer1x1DynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv =
-      CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
-                                                const BHWC &dst_shape,
-                                                const DeviceInfo &device_info,
-                                                const OperationDef &op_def, ModelHints hints)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-  else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsIntel())
-  {
-    return SelectConvolutionPowerVR(attr, device_info, op_def);
-  }
-  else if (device_info.IsNvidia())
-  {
-    return SelectConvolutionNVidia(attr, dst_shape, device_info, op_def);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionMali(attr, dst_shape, device_info, op_def);
-  }
-  else
-  {
-    return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
-                                                           const BHWC &dst_shape,
-                                                           const DeviceInfo &device_info,
-                                                           const OperationDef &op_def,
-                                                           ModelHints hints)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-  else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsNvidia() ||
-           device_info.IsIntel())
-  {
-    ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionWinogradMali(attr, dst_shape, device_info, op_def);
-  }
-  else
-  {
-    return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints hints,
-                                    ConvWeightsDescription *weights_desc)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, device_info,
-                                                 op_def, hints, weights_desc);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, device_info, op_def,
-                                               hints, weights_desc);
-  }
-  else
-  {
-    ConvPowerVR conv =
-      CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
-                             ModelHints)
-{
-  ConverterToConvWeights converter = ConverterToConvWeights(op_def, weights_desc);
-  return absl::make_unique<ConverterToConvWeights>(std::move(converter));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h

deleted file mode 100644 (file)

index d45eea8..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ModelHints.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
-                                                const BHWC &dst_shape,
-                                                const DeviceInfo &device_info,
-                                                const OperationDef &op_def, ModelHints hints);
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
-                                                           const BHWC &dst_shape,
-                                                           const DeviceInfo &device_info,
-                                                           const OperationDef &op_def,
-                                                           ModelHints hints);
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints hints,
-                                    ConvWeightsDescription *weights_desc);
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
-                             ModelHints hints);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc

deleted file mode 100644 (file)

index f07eef6..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DwConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/DepthwiseConv3x3.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionAdreno(const DepthwiseConvolution2DAttributes &attr,
-                          const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  if (IsDepthwiseConv3x3Supported(attr))
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionPowerVR(const DepthwiseConvolution2DAttributes &attr,
-                           const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  if (IsDepthwiseConv3x3Supported(attr))
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectDWConvolutionMali(const DepthwiseConvolution2DAttributes &attr,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def)
-{
-  const auto storage_type = op_def.src_tensors[0].storage_type;
-  bool buffer_type =
-    storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER;
-  const MaliInfo mali_info = device_info.mali_info;
-  if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type &&
-      op_def.precision != CalculationsPrecision::F32)
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
-                                                  const DeviceInfo &device_info,
-                                                  const OperationDef &op_def)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectDWConvolutionAdreno(attr, device_info, op_def);
-  }
-  else if (device_info.IsPowerVR())
-  {
-    return SelectDWConvolutionPowerVR(attr, device_info, op_def);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectDWConvolutionMali(attr, device_info, op_def);
-  }
-  else
-  {
-    return SelectDWConvolutionAdreno(attr, device_info, op_def);
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h

deleted file mode 100644 (file)

index 2fa40c5..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
-                                                  const DeviceInfo &device_info,
-                                                  const OperationDef &op_def);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc

deleted file mode 100644 (file)

index ac514b2..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SimpleSelectors.h"
-
-#include <memory>
-#include <set>
-
-#include "open_cl/kernels/Add.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/Pooling.h"
-#include "open_cl/kernels/Relu.h"
-#include "open_cl/kernels/Reshape.h"
-#include "open_cl/kernels/Reshapex4.h"
-#include "open_cl/kernels/Softmax.h"
-#include "open_cl/kernels/Softmax1x1.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
-               std::unique_ptr<GPUOperation> *ptr)
-{
-  GPUOperation operation = CreateAdd(op_def, channels, dst_channels);
-  *ptr = std::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
-                                  const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  return absl::make_unique<GPUOperation>(
-    CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr));
-}
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
-                                            const OperationDef &op_def)
-{
-  GPUOperation operation = CreatePooling(op_def, attr);
-  return absl::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def)
-{
-  return absl::make_unique<GPUOperation>(CreateReLU(op_def, attr));
-}
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr)
-{
-  if (src_channels % 4 == 0 && dst_channels % 4 == 0)
-  {
-    GPUOperation operation = CreateReshapex4(op_def);
-    *ptr = std::make_unique<GPUOperation>(std::move(operation));
-  }
-  else
-  {
-    GPUOperation operation = CreateReshape(op_def);
-    *ptr = std::make_unique<GPUOperation>(std::move(operation));
-  }
-}
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr)
-{
-  if (shape.w == 1 && shape.h == 1)
-  {
-    Softmax1x1 operation = CreateSoftmax1x1(op_def);
-    *ptr = absl::make_unique<Softmax1x1>(std::move(operation));
-  }
-  else
-  {
-    GPUOperation operation = CreateSoftmax(op_def);
-    *ptr = absl::make_unique<GPUOperation>(std::move(operation));
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h

deleted file mode 100644 (file)

index 2c5837a..0000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-
-#include <memory>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
-               std::unique_ptr<GPUOperation> *ptr);
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
-                                  const DeviceInfo &device_info, const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
-                                            const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def);
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr);
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc

index 6dd9bd252a8bb26fe8bab7676c0dcc8937308931..d3ed102a1785e06bd8478da7712cb3d794007484 100644 (file)
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -16,10 +16,12 @@
  
  #include "CLTensor.h"
  
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+using namespace tflite::gpu::cl;
  
  namespace onert
  {
@@ -30,16 +32,15 @@ namespace gpu_cl
  namespace operand
  {
  
-CLTensor::CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
-  : ICLTensor{rank, shape, environment}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, ir::Shape shape,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
+  : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
  {
  }
  
-const Tensor *CLTensor::handle() const { return _tensor.get(); }
-
-Tensor *CLTensor::handle() { return _tensor.get(); }
+const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); }
  
-void CLTensor::setBuffer(void *host_ptr) { (void)host_ptr; }
+tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); }
  
  } // namespace operand
  } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h

index 7d2e70a9924304a8960881ee30158f8466da2635..f2153f430203915fbab7b2d08739c47c99639d38 100644 (file)
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -19,9 +19,9 @@
  
  #include "ICLTensor.h"
  
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
  
  namespace onert
  {
@@ -38,11 +38,12 @@ public:
    CLTensor() = delete;
  
  public:
-  CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment);
+  CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+           TensorType type);
  
  public:
-  const Tensor *handle() const override;
-  Tensor *handle() override;
+  const tflite::gpu::cl::Tensor *handle() const override;
+  tflite::gpu::cl::Tensor *handle() override;
  
  public:
    /** Set given buffer as the buffer of the tensor
@@ -55,7 +56,7 @@ public:
    void setBuffer(void *host_ptr);
  
  private:
-  std::shared_ptr<Tensor> _tensor;
+  std::shared_ptr<tflite::gpu::cl::Tensor> _tensor;
  };
  
  } // namespace operand
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc

index 3f070be0c430475ded4ff93fe7fa59c531bf2a38..a95f780568e97a164b144fba202f7968fa362be3 100644 (file)
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -16,11 +16,11 @@
  
  #include "ICLTensor.h"
  
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/kernels/Converter.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
  
  namespace onert
  {
@@ -31,6 +31,10 @@ namespace gpu_cl
  namespace operand
  {
  
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+using namespace tflite::gpu::internal_tensor;
+
  void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
  {
    if (total_size() == 0)
@@ -39,100 +43,133 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
    fn(*this);
  }
  
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::writeConvertInit()
  {
-  const float *arr = (float *)ptr;
-  TensorObject input_obj = MakeReadableCpuMemory(absl::MakeSpan(arr, total_size() / 4));
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = DataLayout::BHWC;
+  input_def.object_def.data_type = DataType::FLOAT32;
+  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
+  input_def.object_def.user_provided = true;
  
-  TensorObject output_obj;
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
  
-  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  auto dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
    {
-    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+    throw std::runtime_error("Failed to AllocateTensorMemory");
    }
-  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  output_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.user_provided = false;
+
+  _converter_builder = NewConverterBuilder(_environment.get());
+  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
    {
-    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+    throw std::runtime_error("Failed to make converter_to");
    }
-  else
+  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
    {
-    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+    throw std::runtime_error("Failed to make converter_from");
    }
+}
+
+void ICLTensor::readConvertInit()
+{
+  _converter_builder = NewConverterBuilder(_environment.get());
  
    TensorObjectDef input_def;
    input_def.dimensions.b = handle()->Batch();
    input_def.dimensions.h = handle()->Height();
    input_def.dimensions.w = handle()->Width();
    input_def.dimensions.c = handle()->Channels();
-  input_def.object_def.data_layout = DataLayout::BHWC;
-  input_def.object_def.data_type = DataType::FLOAT32;
-  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
-  input_def.object_def.user_provided = true;
+  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  input_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+  input_def.object_def.user_provided = false;
  
-  TensorObjectDef tmp_def;
-  tmp_def.dimensions.b = handle()->Batch();
-  tmp_def.dimensions.h = handle()->Height();
-  tmp_def.dimensions.w = handle()->Width();
-  tmp_def.dimensions.c = handle()->Channels();
-  tmp_def.object_def.data_layout = DataLayout::BHWC;
-  tmp_def.object_def.data_type = DataType::FLOAT32;
-  tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  tmp_def.object_def.user_provided = true;
-
-  auto dims = tmp_def.dimensions;
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.data_layout = DataLayout::BHWC;
+  permute_def.object_def.data_type = DataType::FLOAT32;
+  permute_def.object_def.user_provided = true;
+
+  auto dims = permute_def.dimensions;
    const BHWC shape(dims.b, dims.h, dims.w, dims.c);
    const TensorDescriptor desc{
-    tmp_def.object_def.data_type,
-    ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
      Layout::BHWC};
    if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
    {
-    throw std::runtime_error("AllocateTensorMemory error.");
+    throw std::runtime_error("Failed to AllocateTensorMemory");
    }
-  TensorObject tmp_obj;
-  if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
+
+  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
    {
-    tmp_obj = OpenClTexture{_cl_memory.memory()};
+    throw std::runtime_error("Failed to make converter_from");
    }
-  else
+  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
    {
-    tmp_obj = OpenClBuffer{_cl_memory.memory()};
+    throw std::runtime_error("Failed to make converter_to");
    }
+}
  
-  TensorObjectDef output_def = input_def;
-  output_def.dimensions.b = handle()->Batch();
-  output_def.dimensions.h = handle()->Height();
-  output_def.dimensions.w = handle()->Width();
-  output_def.dimensions.c = handle()->Channels();
-  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
-  output_def.object_def.data_type = handle()->GetDataType();
-  output_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+{
+  TensorObject input_obj =
+    MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
  
-  _converter_builder = NewConverterBuilder(_environment.get());
-  if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_cpu).ok())
+  TensorObject output_obj;
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
    {
-    throw std::runtime_error("MakeConverter<_converter_cpu> error.");
+    permute_obj = OpenClTexture{_cl_memory.memory()};
    }
-  if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_bhwc).ok())
+  else
    {
-    throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
    }
  
-  if (!_converter_cpu->Convert(input_obj, tmp_obj).ok())
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
    {
-    throw std::runtime_error("[w] _converter_cpu Convert error.");
+    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
    }
-  if (!_converter_bhwc->Convert(tmp_obj, output_obj).ok())
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
    {
-    throw std::runtime_error("[w] _converter_bhwc Convert error.");
+    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  if (!_converter_to->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to write cl buffer from cpu memory");
+  }
+  if (!_converter_from->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
    }
  }
  
  void ICLTensor::enqueueReadBuffer(void *ptr, bool)
  {
-  float *arr = (float *)ptr;
-  TensorObject output_obj = MakeCpuMemory(absl::MakeSpan(arr, total_size() / 4));
-
    TensorObject input_obj;
  
    if (handle()->GetStorageType() == TensorStorageType::BUFFER)
@@ -148,72 +185,26 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
      input_obj = OpenClTexture{handle()->GetMemoryPtr()};
    }
  
-  TensorObjectDef input_def;
-  input_def.dimensions.b = handle()->Batch();
-  input_def.dimensions.h = handle()->Height();
-  input_def.dimensions.w = handle()->Width();
-  input_def.dimensions.c = handle()->Channels();
-  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
-  input_def.object_def.data_type = handle()->GetDataType();
-  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  input_def.object_def.user_provided = false;
-
-  TensorObjectDef tmp_def;
-  tmp_def.dimensions.b = handle()->Batch();
-  tmp_def.dimensions.h = handle()->Height();
-  tmp_def.dimensions.w = handle()->Width();
-  tmp_def.dimensions.c = handle()->Channels();
-  tmp_def.object_def.data_layout = DataLayout::BHWC;
-  tmp_def.object_def.data_type = DataType::FLOAT32;
-  tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  tmp_def.object_def.user_provided = true;
-
-  auto dims = tmp_def.dimensions;
-  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
-  const TensorDescriptor desc{
-    tmp_def.object_def.data_type,
-    ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
-    Layout::BHWC};
-  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
    {
-    throw std::runtime_error("AllocateTensorMemory error.");
-  }
-  TensorObject tmp_obj;
-  if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
-  {
-    tmp_obj = OpenClTexture{_cl_memory.memory()};
+    permute_obj = OpenClTexture{_cl_memory.memory()};
    }
    else
    {
-    tmp_obj = OpenClBuffer{_cl_memory.memory()};
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
    }
-  TensorObjectDef output_def = input_def;
-  output_def.dimensions.b = handle()->Batch();
-  output_def.dimensions.h = handle()->Height();
-  output_def.dimensions.w = handle()->Width();
-  output_def.dimensions.c = handle()->Channels();
-  output_def.object_def.data_layout = DataLayout::BHWC;
-  output_def.object_def.data_type = DataType::FLOAT32;
-  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
-  output_def.object_def.user_provided = true;
  
-  _converter_builder = NewConverterBuilder(_environment.get());
-  if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_bhwc).ok())
-  {
-    throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
-  }
-  if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_cpu).ok())
-  {
-    throw std::runtime_error("MakeConverter<_converter_cpu> error.");
-  }
+  TensorObject output_obj =
+    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
  
-  if (!_converter_bhwc->Convert(input_obj, tmp_obj).ok())
+  if (!_converter_from->Convert(input_obj, permute_obj).ok())
    {
-    throw std::runtime_error("[r] _converter_bhwc Convert error.");
+    throw std::runtime_error("Failed to change layout");
    }
-  if (!_converter_cpu->Convert(tmp_obj, output_obj).ok())
+  if (!_converter_to->Convert(permute_obj, output_obj).ok())
    {
-    throw std::runtime_error("[r] _converter_cpu Convert error.");
+    throw std::runtime_error("Failed to read cl buffer");
    }
  }
  
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h

index 28e905d48b566023a55a0971f43dfa161104e425..b8ad4469fe136b1b637c486768e2c10a2cc92ad8 100644 (file)
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -19,11 +19,14 @@
  
  #include <backend/ITensor.h>
  
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/kernels/Converter.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+
+#include "TensorBuilderHelper.h"
  
  namespace onert
  {
@@ -43,19 +46,18 @@ public:
    ICLTensor(ICLTensor &&) = default;
    ICLTensor &operator=(ICLTensor &&) = default;
  
-  ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
-    : _rank{rank}, _shape{shape}, _environment(environment)
+  ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+            TensorType type)
+    : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
    {
    }
  
  public:
    uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
    size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
-  size_t calcOffset(const ir::Coordinates &coords) const final
+  size_t calcOffset(const ir::Coordinates &) const final
    {
-    // NYI
-    (void)coords;
-    return 0;
+    throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
    }
    ir::Layout layout() const final { return ir::Layout::NHWC; }
    ir::DataType data_type() const final { return ir::DataType::FLOAT32; }
@@ -83,19 +85,24 @@ public:
    void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
    void enqueueReadBuffer(void *ptr, bool blocking = true) final;
  
+  void writeConvertInit();
+  void readConvertInit();
+  TensorType get_type() { return _type; }
+
  public:
-  virtual const Tensor *handle() const = 0;
-  virtual Tensor *handle() = 0;
+  virtual const tflite::gpu::cl::Tensor *handle() const = 0;
+  virtual tflite::gpu::cl::Tensor *handle() = 0;
  
  private:
  protected:
    size_t _rank; // Actual rank (reflects extended rank)
    ir::Shape _shape;
-  std::shared_ptr<Environment> _environment;
-  std::unique_ptr<TensorObjectConverterBuilder> _converter_builder;
-  CLMemory _cl_memory;
-  std::unique_ptr<TensorObjectConverter> _converter_cpu;
-  std::unique_ptr<TensorObjectConverter> _converter_bhwc;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  TensorType _type;
+  std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+  tflite::gpu::cl::CLMemory _cl_memory;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
  };
  
  } // namespace operand
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h

index 5dfdc7ec5102b25b77ffdaa70ef0b240086766dd..716400c1f14e0beea9024d3ad7508b58bb47e7f7 100644 (file)
--- a/runtime/onert/backend/ruy/ops/OperationUtils.h
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -18,17 +18,17 @@
  #define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
  
  #include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
  
  #include <ruy/Shape.h>
  #include <ruy/Types.h>
-#include <iostream>
-#include <ir/DataType.h>
-#include <ir/InternalType.h>
-#include <ir/Padding.h>
  
  #include <limits>
  
  using OperandType = onert::ir::DataType;
+using namespace onert::util;
  
  namespace onert
  {
@@ -79,40 +79,6 @@ inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Ac
    }
  }
  
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    std::cout << "Unsupported fused activation function." << std::endl;
-  }
-}
-
  nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
  
  } // namespace ops
diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h

new file mode 100644 (file)

index 0000000..a638397
--- /dev/null
+++ b/runtime/onert/backend/trix/Backend.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc

new file mode 100644 (file)

index 0000000..e46b11d
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BackendContext.h b/runtime/onert/backend/trix/BackendContext.h

new file mode 100644 (file)

index 0000000..c0734c4
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "DevContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _dev_context(new DevContext)
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<DevContext> dev_context() { return _dev_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt

new file mode 100644 (file)

index 0000000..5455757
--- /dev/null
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
+
+nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+if(NOT TRIXEngine_FOUND)
+  return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE trix_engine)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES OUTPUT_NAME backend_trix)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRIX} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRIX}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION lib)
diff --git a/runtime/onert/backend/trix/Config.cc b/runtime/onert/backend/trix/Config.cc

new file mode 100644 (file)

index 0000000..c233264
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Config.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/Config.h b/runtime/onert/backend/trix/Config.h

new file mode 100644 (file)

index 0000000..799047d
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONFIG_H__
+#define __ONERT_BACKEND_TRIX_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "trix"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONFIG_H__
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h

new file mode 100644 (file)

index 0000000..482932f
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class DevContext
+{
+public:
+  DevContext()
+  {
+    auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+    if (device_count <= 0)
+    {
+      throw std::runtime_error("Unable to find TRIV2 NPU device");
+    }
+
+    // Use NPU 0 device
+    if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+    {
+      throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+    }
+  }
+
+  ~DevContext()
+  {
+    if (_dev_handle != nullptr)
+    {
+      unregisterNPUmodel_all(_dev_handle);
+      putNPUdevice(_dev_handle);
+    }
+  }
+
+  npudev_h getDev() { return _dev_handle; }
+
+  template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
+  {
+    info->num_info = static_cast<uint32_t>(tensors.size());
+
+    for (uint32_t idx = 0; idx < info->num_info; ++idx)
+    {
+      info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+      info->info[idx].type = convertDataType(tensors[idx]->data_type());
+    }
+  }
+
+  template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+  {
+    buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+    for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+    {
+      buf->bufs[idx].addr = tensors[idx]->buffer();
+      buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+      buf->bufs[idx].type = BUFFER_MAPPED;
+    }
+  }
+
+private:
+  data_layout convertDataLayout(const ir::Layout layout)
+  {
+    switch (layout)
+    {
+      case ir::Layout::NCHW:
+        return DATA_LAYOUT_NCHW;
+      case ir::Layout::NHWC:
+        return DATA_LAYOUT_NHWC;
+      default:
+        throw std::runtime_error("Unknown Layout");
+    }
+  }
+
+  data_type convertDataType(const ir::DataType type)
+  {
+    switch (type)
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+        return DATA_TYPE_QASYMM8;
+      case ir::DataType::QUANT_INT16_SYMM:
+        return DATA_TYPE_QSYMM16;
+      default:
+        throw std::runtime_error("Unsupported data type");
+    }
+  }
+
+private:
+  // NPU device handle
+  // TODO Support multicore npu device
+  npudev_h _dev_handle;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc

new file mode 100644 (file)

index 0000000..68e6840
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/BulkLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph,
+                                 const std::shared_ptr<TensorBuilder> &tensor_builder,
+                                 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<DevContext> &dev_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context}
+{
+  // DO NOTHING
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::Bulk &node)
+{
+  using ir::operation::Bulk;
+
+  std::vector<IPortableTensor *> output_tensors;
+  for (auto &ofm_idx : node.getOutputs())
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
+
+  std::vector<const IPortableTensor *> input_tensors;
+  for (auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
+
+  // parameters
+  const auto binary_path = node.param().binary_path;
+
+  auto fn = std::make_unique<ops::BulkLayer>();
+
+  fn->configure(input_tensors, output_tensors, binary_path, _dev_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h

new file mode 100644 (file)

index 0000000..d87dc69
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+#include "DevContext.h"
+
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<DevContext> &dev_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
+
+private:
+  void visit(const ir::operation::Bulk &node) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+  const std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/trix/Tensor.h b/runtime/onert/backend/trix/Tensor.h

new file mode 100644 (file)

index 0000000..5138cee
--- /dev/null
+++ b/runtime/onert/backend/trix/Tensor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_H__
+
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_H__
diff --git a/runtime/onert/backend/trix/TensorBuilder.h b/runtime/onert/backend/trix/TensorBuilder.h

new file mode 100644 (file)

index 0000000..ac6ca0f
--- /dev/null
+++ b/runtime/onert/backend/trix/TensorBuilder.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+using TensorBuilder = basic::TensorBuilder;
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc

new file mode 100644 (file)

index 0000000..71fdf3f
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BulkLayer.h"
+#include <util/logging.h>
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+{
+  // DO NOTHING
+}
+
+BulkLayer::~BulkLayer() { free(_meta); }
+
+void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
+                          std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                          const std::shared_ptr<DevContext> &dev_context)
+{
+  _inputs = inputs;
+  _outputs = outputs;
+  _dev_context = dev_context;
+
+  _meta = getNPUmodel_metadata(binary_path.c_str(), false);
+  if (_meta == nullptr)
+  {
+    throw std::runtime_error("Unable to extract the model metadata");
+  }
+
+  generic_buffer model_file;
+  model_file.type = BUFFER_FILE;
+  model_file.filepath = binary_path.c_str();
+  model_file.size = _meta->size;
+
+  if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+  {
+    throw std::runtime_error("Failed to register npu model");
+  }
+}
+
+void BulkLayer::run()
+{
+  int req_id;
+  if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+  {
+    throw std::runtime_error("Unable to create NPU request with model id (" +
+                             std::to_string(_model_id) + ")");
+  }
+
+  if (_meta->input_seg_num != _inputs.size())
+  {
+    throw std::runtime_error("input size does not match to model input seg num");
+  }
+
+  if (_meta->output_seg_num != _outputs.size())
+  {
+    throw std::runtime_error("output size does not match to model output seg num");
+  }
+
+  tensors_data_info in_info;
+  tensors_data_info out_info;
+  _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
+  _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+
+  input_buffers input_buf;
+  output_buffers output_buf;
+  _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
+  _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+
+  if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
+                         &out_info))
+  {
+    throw std::runtime_error("Unable to create NPU request for model id (" +
+                             std::to_string(_model_id) + ")");
+  }
+
+  if (submitNPU_request(_dev_context->getDev(), req_id))
+  {
+    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+                             ")");
+  }
+
+  if (removeNPU_request(_dev_context->getDev(), req_id))
+  {
+    throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+                             ")");
+  }
+}
+
+void BulkLayer::prepare()
+{
+  // DO NOTHING
+}
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h

new file mode 100644 (file)

index 0000000..f7080cc
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+#define __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../DevContext.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+class BulkLayer : public ::onert::exec::IFunction
+{
+public:
+  BulkLayer();
+  ~BulkLayer();
+
+public:
+  void configure(const std::vector<const IPortableTensor *> &inputs,
+                 std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                 const std::shared_ptr<DevContext> &dev_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  std::vector<const IPortableTensor *> _inputs;
+  std::vector<IPortableTensor *> _outputs;
+
+  uint32_t _model_id;
+  npubin_meta *_meta;
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
diff --git a/runtime/onert/backend/trix/trix.cc b/runtime/onert/backend/trix/trix.cc

new file mode 100644 (file)

index 0000000..816fb44
--- /dev/null
+++ b/runtime/onert/backend/trix/trix.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::trix::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h

index 5102e32dd0013086dbb3e10f3da4d94bb65bb86d..fe93fccc0378fdf0c967fd3baf98ffaa7fb4584e 100644 (file)
--- a/runtime/onert/backend/xnnpack/ops/OperationUtils.h
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -17,10 +17,10 @@
  #ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
  #define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
  
-// duplicated from cpu/ops/OperationUtils.h
+#include <ir/DataType.h>
  #include <ir/InternalType.h>
  #include <ir/Padding.h>
-#include <ir/DataType.h>
+#include <util/CalculateActivationRange.h>
  
  namespace onert
  {
@@ -32,40 +32,7 @@ namespace ops
  {
  
  using OperandType = ir::DataType;
-
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    throw std::runtime_error{"Unsupported fused activation function"};
-  }
-}
+using namespace onert::util; // CalculateActivationRange
  
  } // namespace ops
  } // namespace xnnpack
diff --git a/runtime/onert/core/include/compiler/LoweredGraph.h b/runtime/onert/core/include/compiler/LoweredGraph.h

index d3ef6d4af34c909a68745f810e132d48cd40d451..10ca8e9fce36f21989d5343469ff61b66e182e90 100644 (file)
--- a/runtime/onert/core/include/compiler/LoweredGraph.h
+++ b/runtime/onert/core/include/compiler/LoweredGraph.h
@@ -60,6 +60,7 @@ public:
  private:
    void makeLowerInfo(const compiler::BackendResolver &backend_resolver);
    void dumpLowerInfo();
+  void lowerGraph(const ir::Graph &graph, const compiler::CompilerOptions &options);
  
  private:
    ir::Graph _graph;
diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h

index e77c308ea2433a963a8b590f32dce9fc5abb1236..0ec0e07119ec0db031f0091ebb84a87da9d31072 100644 (file)
--- a/runtime/onert/core/include/ir/DataType.h
+++ b/runtime/onert/core/include/ir/DataType.h
@@ -38,6 +38,7 @@ enum class DataType
    QUANT_INT8_ASYMM = 9,
    QUANT_INT16_ASYMM = 10,
    QUANT_INT8_SYMM_PER_CHANNEL = 11,
+  QUANT_INT16_SYMM = 12,
  };
  
  size_t sizeOfDataType(DataType data_type);
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h

index 0eb45e1ee7f33531e80680abe95d8961d52c4bb0..4602fafec53914c15e21f61785bb9a9735a547b9 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -24,6 +24,7 @@
  #include "ir/operation/BCQGather.h"
  #include "ir/operation/BinaryArithmetic.h"
  #include "ir/operation/BroadcastTo.h"
+#include "ir/operation/Bulk.h"
  #include "ir/operation/Comparison.h"
  #include "ir/operation/Concat.h"
  #include "ir/operation/Conv2D.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst

index f17fdfdd7e57199b9dacf3e940c18fcce78f4c9d..f37d89505cb74404aa0517fec60ab6d29dab93f6 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -27,6 +27,7 @@ OP(BCQFullyConnected)
  OP(BCQGather)
  OP(BinaryArithmetic)
  OP(BroadcastTo)
+OP(Bulk)
  OP(Comparison)
  OP(Concat)
  OP(Conv2D)
diff --git a/runtime/onert/core/include/ir/operation/Bulk.h b/runtime/onert/core/include/ir/operation/Bulk.h

new file mode 100644 (file)

index 0000000..1825f7f
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/Bulk.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_IR_OPERATION_BULK_H__
+#define __ONERT_IR_OPERATION_BULK_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class Bulk : public Operation
+{
+public:
+  struct Param
+  {
+    std::string binary_path;
+  };
+
+public:
+  Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs, const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+  OpCode opcode() const final { return OpCode::Bulk; }
+  const Param &param() const { return _param; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __ONERT_IR_OPERATION_BULK_H__
diff --git a/runtime/onert/core/include/util/CalculateActivationRange.h b/runtime/onert/core/include/util/CalculateActivationRange.h

new file mode 100644 (file)

index 0000000..db76f9d
--- /dev/null
+++ b/runtime/onert/core/include/util/CalculateActivationRange.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+#define __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
+
+#include "ir/InternalType.h"
+
+namespace onert
+{
+namespace util
+{
+
+template <typename T>
+void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
+{
+  if (activation == ir::Activation::RELU)
+  {
+    *activation_min = 0;
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else if (activation == ir::Activation::RELU6)
+  {
+    *activation_min = 0;
+    *activation_max = 6;
+  }
+  else if (activation == ir::Activation::RELU1)
+  {
+    *activation_min = -1;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::SIGMOID)
+  {
+    *activation_min = 0;
+    *activation_max = 1;
+  }
+  else if (activation == ir::Activation::NONE)
+  {
+    *activation_min = std::numeric_limits<T>::lowest();
+    *activation_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    throw std::runtime_error{"Unsupported fused activation function."};
+  }
+}
+
+} // namespace util
+} // namespace onert
+
+#endif // __ONERT_UTIL_CALCULATE_ACTIVATION_RANGE_H__
diff --git a/runtime/onert/core/src/compiler/Compiler.cc b/runtime/onert/core/src/compiler/Compiler.cc

index 93792dd1c429331d9bc8a28b01454bad93f4dd30..6a1d8fceccfbd2420b282bc67f50fd4a8a29f75e 100644 (file)
--- a/runtime/onert/core/src/compiler/Compiler.cc
+++ b/runtime/onert/core/src/compiler/Compiler.cc
@@ -64,6 +64,52 @@ std::string getOpBackends(std::unordered_map<ir::OpCode, std::string> &opcode_to
    return opbackends;
  }
  
+void verboseOptions(compiler::CompilerOptions &options)
+{
+  VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
+  VERBOSE(Compiler) << "backend_list             : "
+                    << nnfw::misc::join(options.backend_list.begin(), options.backend_list.end(),
+                                        "/")
+                    << std::endl;
+  VERBOSE(Compiler) << "trace_filepath           : " << options.trace_filepath << std::endl;
+  VERBOSE(Compiler) << "graph_dump_level         : " << options.graph_dump_level << std::endl;
+  VERBOSE(Compiler) << "executor                 : " << options.executor << std::endl;
+  VERBOSE(Compiler) << "manual backend_for_all   : "
+                    << options.manual_scheduler_options.backend_for_all << std::endl;
+  VERBOSE(Compiler) << "manual_scheduler_options : "
+                    << getOpBackends(options.manual_scheduler_options.opcode_to_backend)
+                    << std::endl;
+  VERBOSE(Compiler) << "he_scheduler             : " << options.he_scheduler << std::endl;
+  VERBOSE(Compiler) << "he_profiling_mode        : " << options.he_profiling_mode << std::endl;
+  VERBOSE(Compiler) << "disable_compile          : " << options.disable_compile << std::endl;
+  VERBOSE(Compiler) << "fp16_enable              : " << options.fp16_enable << std::endl
+                    << std::noboolalpha;
+}
+
+void setBackendMap(compiler::ManualSchedulerOptions &ms_options, const ir::Subgraphs &subgs,
+                   const std::string &str)
+{
+  // TODO Support multiple subgraphs for manual scheduling
+  auto key_val_list = nnfw::misc::split(str, ';');
+  for (const auto &key_val_str : key_val_list)
+  {
+    if (key_val_str.empty())
+    {
+      continue;
+    }
+
+    auto key_val = nnfw::misc::split(key_val_str, '=');
+    const auto &key_str = key_val.at(0);
+    const auto &val = key_val.at(1);
+    auto key = static_cast<uint32_t>(std::stoi(key_str));
+
+    subgs.at(ir::SubgraphIndex{0})
+      ->operations()
+      .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
+    ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
+  }
+}
+
  } // namespace
  
  namespace onert
@@ -104,26 +150,8 @@ CompilerOptions fetchCompilerOptionsFromGlobalConfig(const ir::Subgraphs &subgs)
  #undef OP
  
      // Index to Backend
-    // TODO Support multiple subgraphs for manual scheduling
      auto map_str = util::getConfigString(util::config::OP_BACKEND_MAP);
-    auto key_val_list = nnfw::misc::split(map_str, ';');
-    for (const auto &key_val_str : key_val_list)
-    {
-      if (key_val_str.empty())
-      {
-        continue;
-      }
-
-      auto key_val = nnfw::misc::split(key_val_str, '=');
-      const auto &key_str = key_val.at(0);
-      const auto &val = key_val.at(1);
-      auto key = static_cast<uint32_t>(std::stoi(key_str));
-
-      subgs.at(ir::SubgraphIndex{0})
-        ->operations()
-        .at(ir::OperationIndex{key}); // Check if exist, or this wil throw
-      ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
-    }
+    setBackendMap(ms_options, subgs, map_str);
    }
    return options;
  }
@@ -143,22 +171,10 @@ void Compiler::enableToFp16() { _options.fp16_enable = true; }
  
  void Compiler::set_backend_from_str(const char *backend_settings)
  {
+  assert(_subgraphs != nullptr);
    // Backend for all
    auto &ms_options = _options.manual_scheduler_options;
-  auto key_val_list = nnfw::misc::split(backend_settings, ';');
-  for (const auto &key_val_str : key_val_list)
-  {
-    if (key_val_str.empty())
-    {
-      continue;
-    }
-
-    auto key_val = nnfw::misc::split(key_val_str, '=');
-    const auto &key_str = key_val.at(0);
-    const auto &val = key_val.at(1);
-    auto key = static_cast<uint32_t>(std::stoi(key_str));
-    ms_options.index_to_backend.emplace(ir::OperationIndex{key}, val);
-  }
+  setBackendMap(ms_options, *_subgraphs, std::string{backend_settings});
  }
  
  void Compiler::checkProfilerConditions()
@@ -344,26 +360,7 @@ std::shared_ptr<exec::ExecutorMap> Compiler::compile(void)
      _options.manual_scheduler_options.opcode_to_backend[ir::OpCode::BCQGather] = "bcq";
    }
  
-  {
-    VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
-    VERBOSE(Compiler) << "backend_list             : "
-                      << nnfw::misc::join(_options.backend_list.begin(),
-                                          _options.backend_list.end(), "/")
-                      << std::endl;
-    VERBOSE(Compiler) << "trace_filepath           : " << _options.trace_filepath << std::endl;
-    VERBOSE(Compiler) << "graph_dump_level         : " << _options.graph_dump_level << std::endl;
-    VERBOSE(Compiler) << "executor                 : " << _options.executor << std::endl;
-    VERBOSE(Compiler) << "manual backend_for_all   : "
-                      << _options.manual_scheduler_options.backend_for_all << std::endl;
-    VERBOSE(Compiler) << "manual_scheduler_options : "
-                      << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
-                      << std::endl;
-    VERBOSE(Compiler) << "he_scheduler             : " << _options.he_scheduler << std::endl;
-    VERBOSE(Compiler) << "he_profiling_mode        : " << _options.he_profiling_mode << std::endl;
-    VERBOSE(Compiler) << "disable_compile          : " << _options.disable_compile << std::endl;
-    VERBOSE(Compiler) << "fp16_enable              : " << _options.fp16_enable << std::endl
-                      << std::noboolalpha;
-  }
+  verboseOptions(_options);
  
    _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
      // Mandatory passes
@@ -544,26 +541,7 @@ std::vector<std::shared_ptr<exec::ExecutorMap>> Compiler::compile(const char *pa
      _options.tracing_ctx = nullptr;
    }
  
-  {
-    VERBOSE(Compiler) << std::boolalpha << "==== Compiler Options ====" << std::endl;
-    VERBOSE(Compiler) << "backend_list             : "
-                      << nnfw::misc::join(_options.backend_list.begin(),
-                                          _options.backend_list.end(), "/")
-                      << std::endl;
-    VERBOSE(Compiler) << "trace_filepath           : " << _options.trace_filepath << std::endl;
-    VERBOSE(Compiler) << "graph_dump_level         : " << _options.graph_dump_level << std::endl;
-    VERBOSE(Compiler) << "executor                 : " << _options.executor << std::endl;
-    VERBOSE(Compiler) << "manual backend_for_all   : "
-                      << _options.manual_scheduler_options.backend_for_all << std::endl;
-    VERBOSE(Compiler) << "manual_scheduler_options : "
-                      << getOpBackends(_options.manual_scheduler_options.opcode_to_backend)
-                      << std::endl;
-    VERBOSE(Compiler) << "he_scheduler             : " << _options.he_scheduler << std::endl;
-    VERBOSE(Compiler) << "he_profiling_mode        : " << _options.he_profiling_mode << std::endl;
-    VERBOSE(Compiler) << "disable_compile          : " << _options.disable_compile << std::endl;
-    VERBOSE(Compiler) << "fp16_enable              : " << _options.fp16_enable << std::endl
-                      << std::noboolalpha;
-  }
+  verboseOptions(_options);
  
    _subgraphs->iterate([&](const ir::SubgraphIndex &, ir::Graph &subg) {
      // Mandatory passes
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.cc b/runtime/onert/core/src/compiler/ExecutorFactory.cc

index ba038e93599396f75a9f494d1eae9f66e9a4d629..f9db1ca890229d93fea29f5030ecdb991be1bdc6 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.cc
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.cc
@@ -16,26 +16,25 @@
  
  #include "ExecutorFactory.h"
  
-#include <deque>
-#include <functional>
-#include "ir/OperationCloner.h"
-#include "exec/ExecutionObservers.h"
-#include "exec/LinearExecutor.h"
-#include "exec/DataflowExecutor.h"
-#include "exec/ParallelExecutor.h"
-#include "compiler/BackendManager.h"
-#include "compiler/ExecutionBuilder.h"
-#include "exec/ExecTime.h"
-#include "compiler/Linear.h"
-#include "compiler/BackendManager.h"
-#include "backend/IPortableTensor.h"
  #include "backend/builtin/Config.h"
  #include "backend/builtin/KernelGenerator.h"
-#include "backend/builtin/UserTensor.h"
  #include "backend/builtin/TensorBuilder.h"
-#include "util/TracingCtx.h"
+#include "backend/builtin/UserTensor.h"
+#include "backend/IPortableTensor.h"
+#include "compiler/BackendManager.h"
+#include "compiler/BackendManager.h"
+#include "compiler/ExecutionBuilder.h"
+#include "compiler/Linear.h"
  #include "dumper/text/GraphDumper.h"
+#include "exec/DataflowExecutor.h"
+#include "exec/ExecTime.h"
+#include "exec/ExecutionObservers.h"
+#include "exec/LinearExecutor.h"
+#include "exec/ParallelExecutor.h"
+#include "ir/OperationCloner.h"
+#include "util/TracingCtx.h"
  
+#include <functional>
  #include <memory>
  
  namespace onert
@@ -282,6 +281,42 @@ void ExecutorFactory::prepareMigrantTensors(compiler::LoweredGraph &lowered_grap
      });
  }
  
+void ExecutorFactory::prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+                                            const std::shared_ptr<exec::ExecutorMap> &executor_map,
+                                            const backend::BackendContexts &backend_contexts)
+{
+  for (auto &pair : backend_contexts)
+  {
+    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
+    if (builtin_context != nullptr)
+    {
+      auto builtin_kernel_gen = builtin_context->kernel_gen;
+      builtin_kernel_gen->setTensorRegistries(tensor_regs);
+      builtin_kernel_gen->setExecutorMap(executor_map);
+    }
+  }
+}
+
+std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+ExecutorFactory::orderBackendContext(const backend::BackendContexts &backend_contexts)
+{
+  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
+
+  for (auto &pair : backend_contexts)
+  {
+    // NOTE builtin backend must be processed lastly.
+    // This is because of Permute layer's specialty which is the only operation that could have
+    // different ITensor objects for the input and the output. And it requires all other backends'
+    // tensors are ready to use.
+    if (pair.first->config()->id() == "builtin")
+      ordered_contexts.emplace_back(pair.first, pair.second.get());
+    else
+      ordered_contexts.emplace_front(pair.first, pair.second.get());
+  }
+
+  return ordered_contexts;
+}
+
  exec::IExecutor *
  ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                                        const compiler::CompilerOptions &options,
@@ -311,32 +346,12 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
    prepareMigrantTensors(*lowered_graph, backend_contexts);
  
    // Give some runtime objects to builtin KernelGenerator
-  for (auto &pair : backend_contexts)
-  {
-    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
-    if (builtin_context != nullptr)
-    {
-      auto builtin_kernel_gen = builtin_context->kernel_gen;
-      builtin_kernel_gen->setTensorRegistries(tensor_regs);
-      builtin_kernel_gen->setExecutorMap(executor_map);
-    }
-  }
+  prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
  
    ExecutionBuilder builder;
  
    // Adjust the order of backends for the upcoming iteration
-  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
-  for (auto &pair : backend_contexts)
-  {
-    // NOTE builtin backend must be processed lastly.
-    // This is because of Permute layer's specialty which is the only operation that could have
-    // different ITensor objects for the input and the output. And it requires all other backends'
-    // tensors are ready to use.
-    if (pair.first->config()->id() == "builtin")
-      ordered_contexts.emplace_back(pair.first, pair.second.get());
-    else
-      ordered_contexts.emplace_front(pair.first, pair.second.get());
-  }
+  auto ordered_contexts = orderBackendContext(backend_contexts);
  
    // Simulate the execution for deallocation of tensors
    std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
@@ -447,32 +462,12 @@ exec::IExecutor *ExecutorFactory::createDataflowExecutor(
    prepareMigrantTensors(*lowered_graph, backend_contexts);
  
    // Give some runtime objects to builtin KernelGenerator
-  for (auto &pair : backend_contexts)
-  {
-    auto builtin_context = dynamic_cast<backend::builtin::BackendContext *>(pair.second.get());
-    if (builtin_context != nullptr)
-    {
-      auto builtin_kernel_gen = builtin_context->kernel_gen;
-      builtin_kernel_gen->setTensorRegistries(tensor_regs);
-      builtin_kernel_gen->setExecutorMap(executor_map);
-    }
-  }
+  prepareBuiltinBackend(tensor_regs, executor_map, backend_contexts);
  
    ExecutionBuilder builder;
  
    // Adjust the order of backends for the upcoming iteration
-  std::deque<std::pair<const backend::Backend *, backend::BackendContext *>> ordered_contexts;
-  for (auto &pair : backend_contexts)
-  {
-    // NOTE builtin backend must be processed lastly.
-    // This is because of Permute layer's specialty which is the only operation that could have
-    // different ITensor objects for the input and the output. And it requires all other backends'
-    // tensors are ready to use.
-    if (pair.first->config()->id() == "builtin")
-      ordered_contexts.emplace_back(pair.first, pair.second.get());
-    else
-      ordered_contexts.emplace_front(pair.first, pair.second.get());
-  }
+  auto ordered_contexts = orderBackendContext(backend_contexts);
  
    // Generate kernels
    for (auto &pair : ordered_contexts)
diff --git a/runtime/onert/core/src/compiler/ExecutorFactory.h b/runtime/onert/core/src/compiler/ExecutorFactory.h

index 5fe1617a61d432776755259ba4f7ec09c9969b19..2ee05fae36fb7abaaf02867e5b30030d7566fb11 100644 (file)
--- a/runtime/onert/core/src/compiler/ExecutorFactory.h
+++ b/runtime/onert/core/src/compiler/ExecutorFactory.h
@@ -17,12 +17,14 @@
  #ifndef __ONERT_COMPILER_EXECUTOR_FACTORY_H__
  #define __ONERT_COMPILER_EXECUTOR_FACTORY_H__
  
-#include <unordered_map>
+#include "TensorRegistries.h"
  
  #include "backend/ITensor.h"
-#include "exec/IExecutor.h"
  #include "compiler/LoweredGraph.h"
-#include "TensorRegistries.h"
+#include "exec/IExecutor.h"
+
+#include <deque>
+#include <unordered_map>
  
  namespace onert
  {
@@ -45,6 +47,12 @@ private:
  private:
    static void prepareMigrantTensors(compiler::LoweredGraph &lowered_graph,
                                      const backend::BackendContexts &backend_contexts);
+  static void prepareBuiltinBackend(const TensorRegistries &tensor_regs,
+                                    const std::shared_ptr<exec::ExecutorMap> &executor_map,
+                                    const backend::BackendContexts &backend_contexts);
+  static std::deque<std::pair<const backend::Backend *, backend::BackendContext *>>
+  orderBackendContext(const backend::BackendContexts &backend_contexts);
+
    static exec::IExecutor *
    createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lowered_graph,
                         const compiler::CompilerOptions &options,
diff --git a/runtime/onert/core/src/compiler/LoweredGraph.cc b/runtime/onert/core/src/compiler/LoweredGraph.cc

index 3b84d02de7906e462be75f1a2a7f02b4bef92beb..999bffa7c42fee4dbb8b18e74519c74a1390afbf 100644 (file)
--- a/runtime/onert/core/src/compiler/LoweredGraph.cc
+++ b/runtime/onert/core/src/compiler/LoweredGraph.cc
@@ -42,84 +42,18 @@ namespace compiler
  
  LoweredGraph::LoweredGraph(const ir::Graph &graph, const CompilerOptions &options) : _graph{graph}
  {
-  // set tracing_ctx for copied graph
-  if (options.tracing_ctx)
-  {
-    auto subgraph_index = options.tracing_ctx->getSubgraphIndex(&graph);
-    options.tracing_ctx->setSubgraphIndex(&_graph, subgraph_index.value());
-  }
-
-  // Build backend contexts
-  auto &backend_manager = BackendManager::get();
-  // Create contexts for other backends
-  for (auto backend_str : options.backend_list)
-  {
-    backend_manager.loadBackend(backend_str);
-    auto backend = backend_manager.get(backend_str);
-
-    // TODO As the default value of backend list contains "cpu", "acl_cl" and "acl_neon", and some
-    // are not available on x64 or some other platforms. So this may be a workaround for x64 and
-    // we should change it back(throw if backend is not loaded) later.
-    if (!backend)
-    {
-      VERBOSE(LoweredGraph) << "Cannot load backend - " << backend_str << std::endl;
-      continue;
-    }
-  }
-  if (backend_manager.num_backends() == 0)
-    throw std::runtime_error{"No available backends loaded."};
-
-  // TODO Move "schedule" phase out of here
-  // Schedule
-  std::unique_ptr<BackendResolver> backend_resolver;
-  auto all_backends = backend_manager.getAll();
-  if (options.he_scheduler)
-  {
-    auto scheduler = HEScheduler(all_backends, options);
-    backend_resolver = scheduler.schedule(_graph);
-    _indexed_ranks = scheduler.getIndexedRanks();
-  }
-  else
-  {
-    auto scheduler = ManualScheduler(all_backends, options);
-    backend_resolver = scheduler.schedule(_graph);
-  }
-
-  makeLowerInfo(*backend_resolver);
-  VERBOSE(LoweredGraph) << "dump before mandatory passes" << std::endl;
-  dumper::text::dumpLoweredGraph(*this);
-
-  // Mandatory passes - kind of legalization(?)
-  pass::PassRunner{}
-    .append(std::make_unique<pass::ConstantInsertionPass>(*this))
-    .append(std::make_unique<pass::ConstantLoweringPass>(*this))
-    .append(std::make_unique<pass::PermutationOperationPass>(*this))
-    .append(std::make_unique<pass::PermutationInsertionPass>(*this))
-    .run();
-
-  dumpLowerInfo();
-
-  // Optimization passes (optional)
-  pass::PassRunner{}.append(std::make_unique<pass::PermutationEliminationPass>(*this)).run();
-
-  VERBOSE(LoweredGraph) << "Dump after all the passes" << std::endl;
-  for (auto operand : _graph.getInputs())
-    VERBOSE(LoweredGraph) << "Graph Input : " << operand << std::endl;
-  for (auto operand : _graph.getOutputs())
-    VERBOSE(LoweredGraph) << "Graph Output : " << operand << std::endl;
-  dumper::text::dumpLoweredGraph(*this);
-
-  // Graph verifications
-  {
-    assert(ir::verifier::InputOutputChecker().verify(_graph));
-    assert(ir::verifier::DAGChecker().verify(_graph));
-    assert(ir::verifier::EdgeChecker().verify(_graph));
-  }
+  lowerGraph(graph, options);
  }
  
+// TODO Design better class and constructor to represent parent_graph
  LoweredGraph::LoweredGraph(const ir::Graph &parent_graph, const ir::Graph &graph,
                             const CompilerOptions &options)
    : _graph{graph}, _parent_graph{parent_graph}
+{
+  lowerGraph(graph, options);
+}
+
+void LoweredGraph::lowerGraph(const ir::Graph &graph, const CompilerOptions &options)
  {
    // set tracing_ctx for copied graph
    if (options.tracing_ctx)
diff --git a/runtime/onert/core/src/exec/IPermuteFunction.h b/runtime/onert/core/src/exec/IPermuteFunction.h

index 8e343cffa747eacbca8fcf9e4f3b509658c6143c..eb54b67ae863a381da4f73ed6ba3248430153fdf 100644 (file)
--- a/runtime/onert/core/src/exec/IPermuteFunction.h
+++ b/runtime/onert/core/src/exec/IPermuteFunction.h
@@ -145,6 +145,9 @@ protected:
        case ir::DataType::INT64:
          permute<int64_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
          break;
+      case ir::DataType::QUANT_INT16_SYMM:
+        permute<int16_t>(src_tensor, dst_tensor, rank, src_offsets, dst_offsets);
+        break;
        default:
          throw std::runtime_error("IPermuteFunction: Not supported data type");
          break;
@@ -338,6 +341,8 @@ protected:
        case ir::DataType::QUANT_INT8_ASYMM:
        case ir::DataType::QUANT_INT8_SYMM:
          return typeid(int8_t);
+      case ir::DataType::QUANT_INT16_SYMM:
+        return typeid(int16_t);
        default:
          throw std::runtime_error("IPermuteFunction: Not supported data type");
      }
diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc

index 8e75c4f53320deb44e53822a4c609bba22d52e38..07670c720814ba91af2bbb5e95d814c0c59bcd3a 100644 (file)
--- a/runtime/onert/core/src/ir/DataType.cc
+++ b/runtime/onert/core/src/ir/DataType.cc
@@ -50,6 +50,8 @@ size_t sizeOfDataType(DataType data_type)
        return sizeof(int64_t);
      case DataType::QUANT_INT16_ASYMM:
        return sizeof(int16_t);
+    case DataType::QUANT_INT16_SYMM:
+      return sizeof(int16_t);
      default:
        throw std::runtime_error{"Unsupported type size"};
    }
diff --git a/runtime/onert/core/src/ir/OperationDumper.cc b/runtime/onert/core/src/ir/OperationDumper.cc

index 80e2a3f7ae674ce1677ceaa96f97b5e306381d07..0b596ff132df3bd414fb5bc745e2b2544b63dced 100644 (file)
--- a/runtime/onert/core/src/ir/OperationDumper.cc
+++ b/runtime/onert/core/src/ir/OperationDumper.cc
@@ -29,19 +29,21 @@ using namespace operation;
  
  namespace
  {
-void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
+
+// Dump all input and output.
+// Use this function when there is no special input or(and) output.
+void dumpOpGeneric(const Operation &node, const std::string &adding_input = "")
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
-               << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs() << ") " << adding_input << std::endl;
+  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs() << ")" << std::endl;
  }
  
-void dumpBinaryInputOp(const Operation &node, const std::string &adding_input = "")
+void dumpUnaryInputOp(const Operation &node, const std::string &adding_input = "")
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
-  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ", " << node.getInputs().at(1)
-               << ") " << adding_input << std::endl;
+  VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(0) << ") " << adding_input
+               << std::endl;
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
@@ -53,18 +55,6 @@ void dumpConvOp(const Operation &node, const std::string &padding_type)
                 << node.getInputs().at(Conv2D::Input::BIAS) << ")" << std::endl;
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
-
-void dumpPackingOp(const Operation &node)
-{
-  VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  for (auto i : node.getInputs())
-  {
-    inputs += std::to_string(i.value()) + ",";
-  }
-  VERBOSE(LIR) << "  - Inputs : Inputs(" << inputs << ")" << std::endl;
-  VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
-}
  } // namespace
  
  OperationDumper::OperationDumper(const std::string &start_msg)
@@ -86,7 +76,7 @@ void OperationDumper::visit(const BatchToSpaceND &node)
    std::string block_size =
      "BlockSize(" + std::to_string(node.getInputs().at(BatchToSpaceND::Input::BLOCK_SIZE).value()) +
      ")";
-  dumpUnaryInputOp(node, block_size);
+  dumpOpGeneric(node, block_size);
  }
  
  void OperationDumper::visit(const BCQFullyConnected &node)
@@ -103,13 +93,13 @@ void OperationDumper::visit(const BCQFullyConnected &node)
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const BinaryArithmetic &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const BinaryArithmetic &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const operation::BroadcastTo &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const operation::BroadcastTo &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const Comparison &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Comparison &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const Concat &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Concat &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Conv2D &node)
  {
@@ -118,11 +108,11 @@ void OperationDumper::visit(const Conv2D &node)
    dumpConvOp(node, padding_type);
  }
  
-void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp16ToFp32 &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ConvertFp32ToFp16 &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const DepthToSpace &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const DepthToSpace &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const DepthwiseConv2D &node)
  {
@@ -143,12 +133,12 @@ void OperationDumper::visit(const ElementwiseActivation &node)
    {
      params = " alpha value(" + std::to_string(node.param().alpha) + ")";
    }
-  dumpUnaryInputOp(node, params);
+  dumpOpGeneric(node, params);
  }
  
-void OperationDumper::visit(const ElementwiseBinary &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseBinary &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const ElementwiseUnary &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ElementwiseUnary &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const EmbeddingLookup &node)
  {
@@ -208,9 +198,9 @@ void OperationDumper::visit(const InstanceNorm &node)
    dumpUnaryInputOp(node, inputs);
  }
  
-void OperationDumper::visit(const L2Normalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const L2Normalization &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const LocalResponseNormalization &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const LocalResponseNormalization &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const LSTM &node)
  {
@@ -258,7 +248,7 @@ void OperationDumper::visit(const LSTM &node)
                 << node.getOutputs().at(LSTM::Output::OUTPUT) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Pack &node) { dumpPackingOp(node); }
+void OperationDumper::visit(const Pack &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Pad &node)
  {
@@ -297,16 +287,16 @@ void OperationDumper::visit(const Pool2D &node)
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Pow &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Pow &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const PReLU &node)
  {
    std::string alpha =
      "Alpha(" + std::to_string(node.getInputs().at(PReLU::Input::ALPHA).value()) + ")";
-  dumpUnaryInputOp(node, alpha);
+  dumpOpGeneric(node, alpha);
  }
  
-void OperationDumper::visit(const Rank &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Rank &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Reduce &node) { dumpUnaryInputOp(node); }
  
@@ -320,37 +310,9 @@ void OperationDumper::visit(const Reshape &node)
    dumpUnaryInputOp(node, shape);
  }
  
-void OperationDumper::visit(const ResizeBilinear &node)
-{
-  if (node.getInputs().size() == 1)
-  {
-    dumpUnaryInputOp(node);
-  }
-  else if (node.getInputs().size() == 2)
-  {
-    dumpBinaryInputOp(node);
-  }
-  else
-  {
-    VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
-  }
-}
+void OperationDumper::visit(const ResizeBilinear &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const ResizeNearestNeighbor &node)
-{
-  if (node.getInputs().size() == 1)
-  {
-    dumpUnaryInputOp(node);
-  }
-  else if (node.getInputs().size() == 2)
-  {
-    dumpBinaryInputOp(node);
-  }
-  else
-  {
-    VERBOSE(LIR) << "* " << node.name() << " is set wrong" << std::endl;
-  }
-}
+void OperationDumper::visit(const ResizeNearestNeighbor &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Reverse &node)
  {
@@ -391,9 +353,9 @@ void OperationDumper::visit(const Select &node)
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const ir::operation::Shape &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const ir::operation::Shape &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const Softmax &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Softmax &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const SpaceToBatchND &node)
  {
@@ -404,11 +366,11 @@ void OperationDumper::visit(const SpaceToBatchND &node)
    dumpUnaryInputOp(node, inputs);
  }
  
-void OperationDumper::visit(const SpaceToDepth &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const SpaceToDepth &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const Split &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Split &node) { dumpOpGeneric(node); }
  
-void OperationDumper::visit(const SquaredDifference &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const SquaredDifference &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const StatelessRandomUniform &node)
  {
@@ -419,7 +381,7 @@ void OperationDumper::visit(const StatelessRandomUniform &node)
    VERBOSE(LIR) << "  - Output : Output(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Squeeze &node) { dumpUnaryInputOp(node); }
+void OperationDumper::visit(const Squeeze &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Slice &node) { dumpUnaryInputOp(node); }
  
@@ -454,22 +416,14 @@ void OperationDumper::visit(const TransposeConv &node)
    VERBOSE(LIR) << "  - Output : OFM(" << node.getOutputs().at(0) << ")" << std::endl;
  }
  
-void OperationDumper::visit(const Transpose &node) { dumpBinaryInputOp(node); }
+void OperationDumper::visit(const Transpose &node) { dumpOpGeneric(node); }
  
  void OperationDumper::visit(const Unpack &node)
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
    VERBOSE(LIR) << "  - Inputs : Input(" << node.getInputs().at(Unpack::Input::INPUT) << ")"
                 << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Outputs : Outputs(" << outputs << ")" << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
  }
  
  void OperationDumper::visit(const OneHot &node)
@@ -483,51 +437,21 @@ void OperationDumper::visit(const OneHot &node)
  void OperationDumper::visit(const If &node)
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
    VERBOSE(LIR) << "  - Inputs : "
                 << "Then subgraph (" << node.param().then_subg_index << ") Else subgraph ("
-               << node.param().else_subg_index << ") Inputs(" << inputs << ")" << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
+               << node.param().else_subg_index << ") Inputs(" << node.getInputs() << ")"
+               << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
  }
  
  void OperationDumper::visit(const While &node)
  {
    VERBOSE(LIR) << "* " << node.name() << std::endl;
-  std::string inputs;
-  const auto &input_indices = node.getInputs();
-  for (auto it = std::begin(input_indices); it != std::end(input_indices); ++it)
-  {
-    inputs += std::to_string(it->value());
-    if (std::next(it) != std::end(input_indices))
-      inputs += ", ";
-  }
    VERBOSE(LIR) << "  - Inputs : "
                 << "Cond subgraph (" << node.param().cond_subg_index << ") Body subgraph ("
-               << node.param().body_subg_index << ") Inputs(" << inputs << ")" << std::endl;
-  std::string outputs;
-  const auto &output_indices = node.getOutputs();
-  for (auto it = std::begin(output_indices); it != std::end(output_indices); ++it)
-  {
-    outputs += std::to_string(it->value());
-    if (std::next(it) != std::end(output_indices))
-      outputs += ", ";
-  }
-  VERBOSE(LIR) << "  - Output : Outputs(" << outputs << ")" << std::endl;
+               << node.param().body_subg_index << ") Inputs(" << node.getInputs() << ")"
+               << std::endl;
+  VERBOSE(LIR) << "  - Output : Outputs(" << node.getOutputs() << ")" << std::endl;
  }
  
  } // namespace ir
diff --git a/runtime/onert/core/src/ir/operation/AddN.cc b/runtime/onert/core/src/ir/operation/AddN.cc

index 110aeebe77c36092a8a7ae4d0a5363ee2353382f..a51e12dff020da0fab386db8e2ece0edb9335df9 100644 (file)
--- a/runtime/onert/core/src/ir/operation/AddN.cc
+++ b/runtime/onert/core/src/ir/operation/AddN.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/AddN.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc

index e918d27ae76ff3e0bc652f94456e2d8aab88ac2a..ccda674ad616b6a4d9b0fe29dd46b09ccc8eb411 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/BCQFullyConnected.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/BCQFullyConnected.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BCQGather.cc b/runtime/onert/core/src/ir/operation/BCQGather.cc

index f9dfaa3f6d6920158396ad458862aa7890fb2a7c..1ca5b0c9fd2931370bc80d0fbe9beb1c913f5da0 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BCQGather.cc
+++ b/runtime/onert/core/src/ir/operation/BCQGather.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/BCQGather.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc

index e58e0f486e369525cf920a7cfd029b619e929dae..3c5578ac436616e247bdcaaf99ac1608441867f2 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
+++ b/runtime/onert/core/src/ir/operation/BatchToSpaceND.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/BatchToSpaceND.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc

index 2d439194f448007c79df97e607e0ea70820a0f91..5eb3fc3d7212975b72900107eab8a277b236aea0 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
+++ b/runtime/onert/core/src/ir/operation/BinaryArithmetic.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/BinaryArithmetic.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/BroadcastTo.cc b/runtime/onert/core/src/ir/operation/BroadcastTo.cc

index 5da7b5abc0333a674280d7b0a3717891074edb17..eab6c0611693c46f113f86df798fbf6c8986f791 100644 (file)
--- a/runtime/onert/core/src/ir/operation/BroadcastTo.cc
+++ b/runtime/onert/core/src/ir/operation/BroadcastTo.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/BroadcastTo.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Bulk.cc b/runtime/onert/core/src/ir/operation/Bulk.cc

new file mode 100644 (file)

index 0000000..4b96c9d
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/Bulk.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/Bulk.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+void Bulk::accept(OperationVisitor &v) const { v.visit(*this); }
+
+Bulk::Bulk(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+           const Bulk::Param &param)
+  : Operation{OperandConstraint::createAny(), inputs, outputs}, _param{param}
+{
+}
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Comparison.cc b/runtime/onert/core/src/ir/operation/Comparison.cc

index 94c96ff699c3fe5b2f9182a242045efdfd25d955..33365657cf285176532986ce469581ca80f3ecf1 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Comparison.cc
+++ b/runtime/onert/core/src/ir/operation/Comparison.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Comparison.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Concat.cc b/runtime/onert/core/src/ir/operation/Concat.cc

index 5d99debb75766892d9546237aaede59ce13a5a62..3a21e36f2efcf658b07769ffcb502efcc0ae8b65 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Concat.cc
+++ b/runtime/onert/core/src/ir/operation/Concat.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Concat.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Conv2D.cc b/runtime/onert/core/src/ir/operation/Conv2D.cc

index 725f3e70b3757d6f6770cb311c0050b8f098d59b..d615ae416703071fbda833f0dc777c71168a5e51 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Conv2D.cc
+++ b/runtime/onert/core/src/ir/operation/Conv2D.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Conv2D.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc

index 822eb30a96818249d567c0eb30523831476a5b5d..365745ea86fc8e90a092da20a6cefdb097f68d40 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp16ToFp32.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/ConvertFp16ToFp32.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc

index 5e5b42f3bb1d93957b348fe8002e27fa248c493d..d4fc7031c4f1b76932d4b593f423a47057923a79 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
+++ b/runtime/onert/core/src/ir/operation/ConvertFp32ToFp16.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/ConvertFp32ToFp16.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthToSpace.cc b/runtime/onert/core/src/ir/operation/DepthToSpace.cc

index 197c7ee482bbb605efe4c0e934263c34a2370a7a..e3edea777ef7406ccf871e3acb846118d1461383 100644 (file)
--- a/runtime/onert/core/src/ir/operation/DepthToSpace.cc
+++ b/runtime/onert/core/src/ir/operation/DepthToSpace.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/DepthToSpace.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc

index bef75c5cfe171b3a15da81083ac615cc84dd73b6..0e7137306ac694a0dafa1ab6f632530edfaca466 100644 (file)
--- a/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
+++ b/runtime/onert/core/src/ir/operation/DepthwiseConv2D.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/DepthwiseConv2D.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc

index f3e942f7deba3bba29c9726b82b5e6a870d6cb98..e83c26e28887c1185c51a0712632808e9d29b3e9 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseActivation.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/ElementwiseActivation.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc

index 155b660dcaf831cc0825b6ae56ad65f7cd8c8370..b22bed7bca28b28cc3ec030670496181ac72a320 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseBinary.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/ElementwiseBinary.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc

index c21c51c05a06cea888b22d6d925e9ebcdd7bfb61..fd463e0fe18c18e9ae0d022f3559a7f9bba46643 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
+++ b/runtime/onert/core/src/ir/operation/ElementwiseUnary.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/ElementwiseUnary.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc

index e236747063a7cd8e7c5562366aa296ebe98b86ec..66b80b2c5ca99ad61ea3464fba805e2470e2b1e4 100644 (file)
--- a/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
+++ b/runtime/onert/core/src/ir/operation/EmbeddingLookup.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/EmbeddingLookup.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ExpandDims.cc b/runtime/onert/core/src/ir/operation/ExpandDims.cc

index 50e3636f32aa24df6b385387631633827620ba3f..e421bc3831292074e3f1f30db94346dd14945218 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ExpandDims.cc
+++ b/runtime/onert/core/src/ir/operation/ExpandDims.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/ExpandDims.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Fill.cc b/runtime/onert/core/src/ir/operation/Fill.cc

index 4a13737d446acf639f34821323e8888d2fb47280..60355c60903930ed441764c685a751fd22c37ebe 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Fill.cc
+++ b/runtime/onert/core/src/ir/operation/Fill.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Fill.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/FullyConnected.cc b/runtime/onert/core/src/ir/operation/FullyConnected.cc

index 335b7b209e8f6f79d46ed8e7720636efd86037f4..3533df0979902454107a4a2bfe526a7cd9ef7315 100644 (file)
--- a/runtime/onert/core/src/ir/operation/FullyConnected.cc
+++ b/runtime/onert/core/src/ir/operation/FullyConnected.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/FullyConnected.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Gather.cc b/runtime/onert/core/src/ir/operation/Gather.cc

index 96a39b3f2f96c08eb899370d5f371b115b1b8a6c..e0c4630a0a44b497ccd5fcaf8f119f66c70bf5e0 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Gather.cc
+++ b/runtime/onert/core/src/ir/operation/Gather.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Gather.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/HashtableLookup.cc b/runtime/onert/core/src/ir/operation/HashtableLookup.cc

index 2974679d41594dabc1feb04c1cee359dfe59fb51..5d1589cd16f7eedcc70df1ed5bf155e863148c3a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/HashtableLookup.cc
+++ b/runtime/onert/core/src/ir/operation/HashtableLookup.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/HashtableLookup.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/InstanceNorm.cc b/runtime/onert/core/src/ir/operation/InstanceNorm.cc

index d9af9d0b7813a08b6fa1d16bfaaa6568c11954ec..9fb55383ef9bf26075a9523ee4506169b0ba818a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/InstanceNorm.cc
+++ b/runtime/onert/core/src/ir/operation/InstanceNorm.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/InstanceNorm.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/L2Normalization.cc b/runtime/onert/core/src/ir/operation/L2Normalization.cc

index 0184ef628e3a6ca351467ed4bab6b23e41f58397..6725df5961938c8d7d3ab55ca91f64cc00f92e34 100644 (file)
--- a/runtime/onert/core/src/ir/operation/L2Normalization.cc
+++ b/runtime/onert/core/src/ir/operation/L2Normalization.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/L2Normalization.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LSTM.cc b/runtime/onert/core/src/ir/operation/LSTM.cc

index 45a1fd1206c91a2b458afdbf47728613b5b69e52..06e66158b095447d2dc8abc319140702c7db3f4b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LSTM.cc
+++ b/runtime/onert/core/src/ir/operation/LSTM.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/LSTM.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc

index 52037cc72e8c77b5dee77bbed45cbde8556a3fed..73fca9938248d021126acb461567d761d8408fbd 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
+++ b/runtime/onert/core/src/ir/operation/LocalResponseNormalization.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/LocalResponseNormalization.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/LogSoftmax.cc b/runtime/onert/core/src/ir/operation/LogSoftmax.cc

index 51f6a6c5d5bc5f3474afe66e21de33b017b58fa6..d580e63e18136df1d6c36f2534de82f72d5b7e83 100644 (file)
--- a/runtime/onert/core/src/ir/operation/LogSoftmax.cc
+++ b/runtime/onert/core/src/ir/operation/LogSoftmax.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/LogSoftmax.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc

index 6046e36fe3a997ad1ca07b9e09800bb60c2f6a78..e52bddc1f9302a4f55bd98b290f866372314751d 100644 (file)
--- a/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
+++ b/runtime/onert/core/src/ir/operation/MatrixBandPart.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/MatrixBandPart.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/PReLU.cc b/runtime/onert/core/src/ir/operation/PReLU.cc

index 5ed31c2b9d3636526648768630e5d648ad8a21b4..87bd12e60e19836fafda6296cfcfb210b37507de 100644 (file)
--- a/runtime/onert/core/src/ir/operation/PReLU.cc
+++ b/runtime/onert/core/src/ir/operation/PReLU.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/PReLU.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Permute.cc b/runtime/onert/core/src/ir/operation/Permute.cc

index 571965de89f9a4e683c642e404b6b7541fd1a09f..813fbaf30adaffbe9e6ebb867d1e93077ee79997 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Permute.cc
+++ b/runtime/onert/core/src/ir/operation/Permute.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Permute.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Pool2D.cc b/runtime/onert/core/src/ir/operation/Pool2D.cc

index cbb42a80a7397b1aeb699afc2fdcd46e172aa15f..e32b876e652de098b37b60a57a3e2b48e4e58841 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pool2D.cc
+++ b/runtime/onert/core/src/ir/operation/Pool2D.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/Pool2D.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/Pow.cc b/runtime/onert/core/src/ir/operation/Pow.cc

index f1df54c60cf1b488bfb4bb1e70d22e8ba3aca185..f7c159a125337a1dcfce7e78d9abd85719c75066 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Pow.cc
+++ b/runtime/onert/core/src/ir/operation/Pow.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Pow.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/RNN.cc b/runtime/onert/core/src/ir/operation/RNN.cc

index a40e5bdc92be8599d661155b6b3da04cd7457442..988a50669509df4d583bf6c6d95ca4f810f4be91 100644 (file)
--- a/runtime/onert/core/src/ir/operation/RNN.cc
+++ b/runtime/onert/core/src/ir/operation/RNN.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/RNN.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Range.cc b/runtime/onert/core/src/ir/operation/Range.cc

index f85d52cb04264250bd0c8308abb06aca463f7893..8ced92a0bf5fb1ce165e4adde2d2400b793e17c6 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Range.cc
+++ b/runtime/onert/core/src/ir/operation/Range.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Range.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Rank.cc b/runtime/onert/core/src/ir/operation/Rank.cc

index c33ed0a804d4ca70968ae6afad32d691afc714b4..40797bf29aa426d335fb1c6b0733c6746d565487 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Rank.cc
+++ b/runtime/onert/core/src/ir/operation/Rank.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Rank.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Reduce.cc b/runtime/onert/core/src/ir/operation/Reduce.cc

index 0811f1c371aefa9f377bead7026e33ef641bcfc2..8da1940fa082f5fcd43aa9326b8e16fc9c9dfe81 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reduce.cc
+++ b/runtime/onert/core/src/ir/operation/Reduce.cc
@@ -15,12 +15,10 @@
   */
  
  #include "ir/operation/Reduce.h"
+#include "ir/OperationVisitor.h"
  
-#include <cassert>
  #include <unordered_map>
  
-#include "ir/OperationVisitor.h"
-
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/Reshape.cc b/runtime/onert/core/src/ir/operation/Reshape.cc

index 54c12574afd7f3a84715189cfc7ff49940036b35..0ed4affa15213bc9e2011a133dff060461205d70 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reshape.cc
+++ b/runtime/onert/core/src/ir/operation/Reshape.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Reshape.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc

index 7c9f5e104d06d0e2a3ef7114bbf67541a84171d7..7d256f44760c691a03bb1a829df183f21530cd8e 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeBilinear.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/ResizeBilinear.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc

index 9792b292decc9bcb9176a6f1247a4a8a1ecadc12..58be87b9553416390f4314bf77c05650cf38c69a 100644 (file)
--- a/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
+++ b/runtime/onert/core/src/ir/operation/ResizeNearestNeighbor.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/ResizeNearestNeighbor.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Reverse.cc b/runtime/onert/core/src/ir/operation/Reverse.cc

index 471457739276b6e14fb9be84bffa545a9652027b..6c3746426f56f09a61a0d1690df6781e891eabe1 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Reverse.cc
+++ b/runtime/onert/core/src/ir/operation/Reverse.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Reverse.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Shape.cc b/runtime/onert/core/src/ir/operation/Shape.cc

index 1b2cd6241758c6b68f4cb7f540efd9fd03a1392a..f9092448824a8f58ec07ebb176dff079e6602836 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Shape.cc
+++ b/runtime/onert/core/src/ir/operation/Shape.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Shape.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Softmax.cc b/runtime/onert/core/src/ir/operation/Softmax.cc

index 91850fa33978117ee99d3df8e35f08e80bf06e61..c06c85309969d443ee08f732b3f65ca0170a0ebd 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Softmax.cc
+++ b/runtime/onert/core/src/ir/operation/Softmax.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Softmax.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc

index 97c6308887d259d62dbb93cd986626fff8cfe419..94acccb0c1e83a100c27ad4bc3c3db61ef0fd98d 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToBatchND.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/SpaceToBatchND.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc

index e1fd27a55d4fe5ecbd878afaab2204b5dbe0bd8b..08e7e51905e4242bcc9bd713ad0d196ae7ace744 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
+++ b/runtime/onert/core/src/ir/operation/SpaceToDepth.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/SpaceToDepth.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Split.cc b/runtime/onert/core/src/ir/operation/Split.cc

index 96822822bfcf39a5573ec3abd33de6425c30e854..3e371188da5ef064023e0081cd730817baf3453f 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Split.cc
+++ b/runtime/onert/core/src/ir/operation/Split.cc
@@ -13,9 +13,10 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+
  #include "ir/operation/Split.h"
-#include <cassert>
  #include "ir/OperationVisitor.h"
+
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/SplitV.cc b/runtime/onert/core/src/ir/operation/SplitV.cc

index 38918cd811d2b1288d3ba839d4b16f147f16bc41..be13f167ed843e3a323ae1437b10c8702cfb9e74 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SplitV.cc
+++ b/runtime/onert/core/src/ir/operation/SplitV.cc
@@ -13,9 +13,10 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+
  #include "ir/operation/SplitV.h"
-#include <cassert>
  #include "ir/OperationVisitor.h"
+
  namespace onert
  {
  namespace ir
diff --git a/runtime/onert/core/src/ir/operation/SquaredDifference.cc b/runtime/onert/core/src/ir/operation/SquaredDifference.cc

index 705b60abcf804ac18a92ac85c604f6292350eb61..db93903c7e4f921fb88ab895990940e5c8bbefb6 100644 (file)
--- a/runtime/onert/core/src/ir/operation/SquaredDifference.cc
+++ b/runtime/onert/core/src/ir/operation/SquaredDifference.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/SquaredDifference.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc

index 18f1cf5a6a5d1e589c82dc34f5254fd17c20944a..94be0be862b19b83cbd1f1efffc042ad14e36125 100644 (file)
--- a/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
+++ b/runtime/onert/core/src/ir/operation/StatelessRandomUniform.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/StatelessRandomUniform.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/StridedSlice.cc b/runtime/onert/core/src/ir/operation/StridedSlice.cc

index e8278b4560db6efb9822c8c0dc128c24559bf263..a38282c933e0ae6850f189cab8ae5bb9c6f8b26e 100644 (file)
--- a/runtime/onert/core/src/ir/operation/StridedSlice.cc
+++ b/runtime/onert/core/src/ir/operation/StridedSlice.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/StridedSlice.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Tile.cc b/runtime/onert/core/src/ir/operation/Tile.cc

index 0ec78557961d4308abdca98d2b3b1c640ccb1e92..51c1ff1dcbbfa25dc8ac5dc46e669d94a013d7c6 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Tile.cc
+++ b/runtime/onert/core/src/ir/operation/Tile.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Tile.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TopKV2.cc b/runtime/onert/core/src/ir/operation/TopKV2.cc

index a1f39202d4925583c366e9c898ab7c59b81c2f28..e1723d180f4ab80d4b7803ee399d0e0193fe616e 100644 (file)
--- a/runtime/onert/core/src/ir/operation/TopKV2.cc
+++ b/runtime/onert/core/src/ir/operation/TopKV2.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/TopKV2.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Transpose.cc b/runtime/onert/core/src/ir/operation/Transpose.cc

index f2ee52f0e7ec302b8fe3d85c735979d2cb9f950b..dbc5ef2aa9d0dbb1932600ab618ab16bca9f41ae 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Transpose.cc
+++ b/runtime/onert/core/src/ir/operation/Transpose.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/Transpose.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/TransposeConv.cc b/runtime/onert/core/src/ir/operation/TransposeConv.cc

index 1f405dc6bd76100c0f33bcfac042889df75f0429..944cc365d3c06d7ccbc61056a4bfde6b662ec9fd 100644 (file)
--- a/runtime/onert/core/src/ir/operation/TransposeConv.cc
+++ b/runtime/onert/core/src/ir/operation/TransposeConv.cc
@@ -15,9 +15,6 @@
   */
  
  #include "ir/operation/TransposeConv.h"
-
-#include <cassert>
-
  #include "ir/OperationVisitor.h"
  
  namespace onert
diff --git a/runtime/onert/core/src/ir/operation/Unpack.cc b/runtime/onert/core/src/ir/operation/Unpack.cc

index 90d3c0c07258008152a307f7133849693c627855..185eddce3b4f1f106b61a78704e2ca300f8a261b 100644 (file)
--- a/runtime/onert/core/src/ir/operation/Unpack.cc
+++ b/runtime/onert/core/src/ir/operation/Unpack.cc
@@ -13,6 +13,7 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+
  #include "ir/operation/Unpack.h"
  #include "ir/OperationVisitor.h"
  
diff --git a/runtime/onert/core/src/ir/operation/While.cc b/runtime/onert/core/src/ir/operation/While.cc

index 8a6f5c01e18e3b1e8530c636f3db830f083e20a2..f35996b070f66fe1a475518ecf5fe26867464471 100644 (file)
--- a/runtime/onert/core/src/ir/operation/While.cc
+++ b/runtime/onert/core/src/ir/operation/While.cc
@@ -13,6 +13,7 @@
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
+
  #include "ir/operation/While.h"
  #include "ir/OperationVisitor.h"
  
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h

index 6ba7ee922c262ce32d16002e369db476651852ba..5649f286d63d12c7bf7cdfdb84ce03eb6e0178ec 100644 (file)
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -96,6 +96,7 @@ protected:
    ir::Activation convertActivation(ActivationFunctionType type);
    ir::DataType tensorTypeToDataType(TensorType type);
    ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx);
+  flexbuffers::Map getCustomOpAttrMap(const Operator *op);
  
    // Create operands form tflite::Tensor
    ir::OperandIndex loadOperand(const Tensor *tensor, ir::Graph &subg);
@@ -110,6 +111,16 @@ protected:
    void loadStridesAndPaddings(Param &param, const OptionsType *options);
    // Load Pool2D param
    template <typename Param> void loadPool2DOptions(Param &param, const Pool2DOptions *options);
+  // Get BuiltinOperator
+  BuiltinOperator getBuiltinOperator(const Operator *op)
+  {
+    auto const builtin_opcode = _model->operator_codes()->Get(op->opcode_index());
+    auto builtin_op = builtin_opcode->builtin_code();
+    if (builtin_op < BuiltinOperator::BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES)
+      builtin_op = static_cast<BuiltinOperator>(builtin_opcode->deprecated_builtin_code());
+
+    return builtin_op;
+  }
  
  private:
    virtual std::unique_ptr<ir::Graph> loadSubgraph(const SubGraph *subg) = 0;
@@ -291,6 +302,15 @@ ir::OperandIndex BaseLoader<LoaderDomain>::BaseLoader::tensorIdxToOperandIdx(int
    return isOptionalInputTensor(tensorIdx) ? ir::OperandIndex() : _tensor_to_operand[tensorIdx];
  }
  
+template <typename LoaderDomain>
+flexbuffers::Map BaseLoader<LoaderDomain>::BaseLoader::getCustomOpAttrMap(const Operator *op)
+{
+  size_t custom_op_data_size = op->custom_options()->size();
+  auto custom_op_data = op->custom_options()->Data();
+  auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
+  return data_root.AsMap();
+}
+
  /* Copy is copied from tensorflow lite */
  template <typename T> bool Copy(const T *data_ptr, std::vector<uint16_t> &arr)
  {
@@ -545,7 +565,7 @@ void BaseLoader<LoaderDomain>::loadOperationIO(const Operator *op, ir::OperandIn
    {
      // Optional tensors are not supported yet except for FULLY_CONNECTED and BCQ_FULLY_CONNECTED
      auto check_optional_input = [&]() {
-      auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+      auto builtin_code = getBuiltinOperator(op);
        if (isOptionalInputTensor(idx) && !allowOptionalInputTensor(builtin_code))
          throw std::runtime_error(
            std::string("loader doesn't support optional input tensor yet for ")
@@ -748,10 +768,7 @@ void BaseLoader<LoaderDomain>::loadAddV2(const Operator *op, ir::Graph &subg)
    }
    else
    {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
      const auto fused_activation_func = static_cast<typename LoaderDomain::ActivationFunctionType>(
        attr_map["fused_activation_function"].AsInt8());
      param.activation = convertActivation(fused_activation_func);
@@ -876,10 +893,7 @@ void BaseLoader<LoaderDomain>::loadReduceAll(const Operator *op, ir::Graph &subg
    }
    else
    {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
      param.keep_dims = attr_map["keep_dims"].AsBool();
    }
  
@@ -931,8 +945,7 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
  template <typename LoaderDomain>
  void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg)
  {
-  const flexbuffers::Map &m =
-    flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap();
+  const auto &m = getCustomOpAttrMap(op);
  
    ir::operation::DetectionPostProcess::Param param;
  
@@ -972,14 +985,17 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
  {
    ir::operation::BatchMatMul::Param param;
  
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  const auto builtin_op = getBuiltinOperator(op);
  
    switch (builtin_op)
    {
      case BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
-      param.adj_x = op->builtin_options_as_BatchMatMulOptions()->adjoint_lhs();
-      param.adj_y = op->builtin_options_as_BatchMatMulOptions()->adjoint_rhs();
-      break;
+      // Handled on each loader: different option name
+      //  Circle: adjoint_lhs, adjoint_rhs
+      //  TFLite: adj_x, adj_y
+      throw std::runtime_error(
+        std::string("Cannot handle here: ").append(EnumNameBuiltinOperator(builtin_op)) + " as " +
+        EnumNameBuiltinOperator(BuiltinOperator::BuiltinOperator_BATCH_MATMUL));
      case BuiltinOperator::BuiltinOperator_CUSTOM:
        if (op->custom_options() == nullptr)
        {
@@ -988,10 +1004,7 @@ void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &su
        }
        else
        {
-        size_t custom_op_data_size = op->custom_options()->size();
-        auto custom_op_data = op->custom_options()->Data();
-        auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-        auto attr_map = data_root.AsMap();
+        const auto attr_map = getCustomOpAttrMap(op);
          param.adj_x = attr_map["adj_x"].AsBool();
          param.adj_y = attr_map["adj_y"].AsBool();
        }
@@ -1184,7 +1197,7 @@ template <typename LoaderDomain>
  void BaseLoader<LoaderDomain>::loadComparison(const Operator *op, ir::Graph &subg)
  {
    ir::operation::Comparison::Param param;
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  const auto builtin_op = getBuiltinOperator(op);
  
    switch (builtin_op)
    {
@@ -1224,10 +1237,7 @@ void BaseLoader<LoaderDomain>::loadEinsum(const Operator *op, ir::Graph &subg)
    }
    else
    {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
      param.equation = attr_map["equation"].ToString();
    }
  
@@ -1247,10 +1257,7 @@ void BaseLoader<LoaderDomain>::loadFusedBatchNorm(const Operator *op, ir::Graph
    }
    else
    {
-    size_t custom_op_data_size = op->custom_options()->size();
-    auto custom_op_data = op->custom_options()->Data();
-    auto data_root = flexbuffers::GetRoot(custom_op_data, custom_op_data_size);
-    auto attr_map = data_root.AsMap();
+    const auto attr_map = getCustomOpAttrMap(op);
      param.is_training = attr_map["is_training"].AsBool();
      param.epsilon = attr_map["epsilon"].AsFloat();
      param.data_format = attr_map["data_format"].ToString();
@@ -1363,7 +1370,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
    // loader doesn't support optional output tensor yet
    if (op->outputs()->size() != 1)
    {
-    auto builtin_code = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+    auto builtin_code = getBuiltinOperator(op);
      throw std::runtime_error(std::string("loader doesn't support optional output tensor yet for ")
                                 .append(EnumNameBuiltinOperator(builtin_code)));
    }
@@ -1381,7 +1388,7 @@ void BaseLoader<LoaderDomain>::loadUnidirectionalSequenceLSTM(const Operator *op
  template <typename LoaderDomain>
  void BaseLoader<LoaderDomain>::loadOperation(const Operator *op, ir::Graph &subg)
  {
-  const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+  auto const builtin_op = getBuiltinOperator(op);
  
    switch (builtin_op)
    {
diff --git a/runtime/onert/frontend/circle/src/circle_loader.cc b/runtime/onert/frontend/circle/src/circle_loader.cc

index 4fb0e71d665c965ce7bccac2aeda03a1e4f3b074..aae831d61a983226bf387cdc8a0493a2157a5e27 100644 (file)
--- a/runtime/onert/frontend/circle/src/circle_loader.cc
+++ b/runtime/onert/frontend/circle/src/circle_loader.cc
@@ -72,6 +72,12 @@ struct LoaderDomain
  class CircleLoader final : public base_loader::BaseLoader<LoaderDomain>
  {
  protected:
+  // Different option name
+  //  Circle: adjoint_lhs, adjoint_rhs
+  //  TFLite: adj_x, adj_y
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
+  // Only circle operations
    void loadInstanceNorm(const Operator *op, ir::Graph &subg);
    void loadBCQFullyConnected(const Operator *op, ir::Graph &subg);
    void loadBCQGather(const Operator *op, ir::Graph &subg);
@@ -129,10 +135,13 @@ private:
  
    void loadOperation(const circle::Operator *op, ir::Graph &subg)
    {
-    const auto builtin_op = _model->operator_codes()->Get(op->opcode_index())->builtin_code();
+    auto const builtin_op = getBuiltinOperator(op);
  
      switch (builtin_op)
      {
+      case circle::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+        loadBatchMatMul(op, subg);
+        return;
        case circle::BuiltinOperator::BuiltinOperator_INSTANCE_NORM:
          loadInstanceNorm(op, subg);
          return;
@@ -149,6 +158,23 @@ private:
    }
  };
  
+void CircleLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BatchMatMul::Param param;
+  const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+  param.adj_x = options->adjoint_lhs();
+  param.adj_y = options->adjoint_rhs();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
  void CircleLoader::loadInstanceNorm(const Operator *op, ir::Graph &subg)
  {
    ir::OperandIndexSequence inputs;
diff --git a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h

index eb1775297ad72f868b80f897caa28e62585b7fca..e3c92eae0303a8b9631756f609e21e7b972780d2 100644 (file)
--- a/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
+++ b/runtime/onert/frontend/circle_schema/include/circle_schema_generated.h
@@ -51,6 +51,9 @@ struct TensorBuilder;
  struct Conv2DOptions;
  struct Conv2DOptionsBuilder;
  
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
+
  struct Pool2DOptions;
  struct Pool2DOptionsBuilder;
  
@@ -327,6 +330,9 @@ struct MatrixSetDiagOptionsBuilder;
  struct IfOptions;
  struct IfOptionsBuilder;
  
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
+
  struct WhileOptions;
  struct WhileOptionsBuilder;
  
@@ -351,6 +357,39 @@ struct SegmentSumOptionsBuilder;
  struct BatchMatMulOptions;
  struct BatchMatMulOptionsBuilder;
  
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
+
  struct BCQGatherOptions;
  struct BCQGatherOptionsBuilder;
  
@@ -375,10 +414,16 @@ struct BufferBuilder;
  struct Metadata;
  struct MetadataBuilder;
  
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
+
  struct Model;
  struct ModelBuilder;
  
-enum TensorType
+enum TensorType : int8_t
  {
    TensorType_FLOAT32 = 0,
    TensorType_FLOAT16 = 1,
@@ -391,36 +436,43 @@ enum TensorType
    TensorType_COMPLEX64 = 8,
    TensorType_INT8 = 9,
    TensorType_FLOAT64 = 10,
+  TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
+  TensorType_RESOURCE = 13,
+  TensorType_VARIANT = 14,
+  TensorType_UINT32 = 15,
    TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_FLOAT64
+  TensorType_MAX = TensorType_UINT32
  };
  
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[16]
  {
-  static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
-                                      TensorType_UINT8,   TensorType_INT64,   TensorType_STRING,
-                                      TensorType_BOOL,    TensorType_INT16,   TensorType_COMPLEX64,
-                                      TensorType_INT8,    TensorType_FLOAT64};
+  static const TensorType values[] = {
+    TensorType_FLOAT32,   TensorType_FLOAT16,  TensorType_INT32,   TensorType_UINT8,
+    TensorType_INT64,     TensorType_STRING,   TensorType_BOOL,    TensorType_INT16,
+    TensorType_COMPLEX64, TensorType_INT8,     TensorType_FLOAT64, TensorType_COMPLEX128,
+    TensorType_UINT64,    TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32};
    return values;
  }
  
  inline const char *const *EnumNamesTensorType()
  {
-  static const char *const names[12] = {"FLOAT32",   "FLOAT16", "INT32",   "UINT8",
-                                        "INT64",     "STRING",  "BOOL",    "INT16",
-                                        "COMPLEX64", "INT8",    "FLOAT64", nullptr};
+  static const char *const names[17] = {"FLOAT32", "FLOAT16",    "INT32",  "UINT8",     "INT64",
+                                        "STRING",  "BOOL",       "INT16",  "COMPLEX64", "INT8",
+                                        "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE",  "VARIANT",
+                                        "UINT32",  nullptr};
    return names;
  }
  
  inline const char *EnumNameTensorType(TensorType e)
  {
-  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_FLOAT64))
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32))
      return "";
    const size_t index = static_cast<size_t>(e);
    return EnumNamesTensorType()[index];
  }
  
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
  {
    QuantizationDetails_NONE = 0,
    QuantizationDetails_CustomQuantization = 1,
@@ -465,7 +517,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
                                       const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                       const flatbuffers::Vector<uint8_t> *types);
  
-enum DimensionType
+enum DimensionType : int8_t
  {
    DimensionType_DENSE = 0,
    DimensionType_SPARSE_CSR = 1,
@@ -493,7 +545,7 @@ inline const char *EnumNameDimensionType(DimensionType e)
    return EnumNamesDimensionType()[index];
  }
  
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
  {
    SparseIndexVector_NONE = 0,
    SparseIndexVector_Int32Vector = 1,
@@ -552,8 +604,11 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
                                     const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                     const flatbuffers::Vector<uint8_t> *types);
  
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
  {
+  BuiltinOperator_BCQ_GATHER = -4,
+  BuiltinOperator_BCQ_FULLY_CONNECTED = -3,
+  BuiltinOperator_INSTANCE_NORM = -2,
    BuiltinOperator_ADD = 0,
    BuiltinOperator_AVERAGE_POOL_2D = 1,
    BuiltinOperator_CONCATENATION = 2,
@@ -681,16 +736,36 @@ enum BuiltinOperator
    BuiltinOperator_DENSIFY = 124,
    BuiltinOperator_SEGMENT_SUM = 125,
    BuiltinOperator_BATCH_MATMUL = 126,
-  BuiltinOperator_BCQ_GATHER = 252,
-  BuiltinOperator_BCQ_FULLY_CONNECTED = 253,
-  BuiltinOperator_INSTANCE_NORM = 254,
-  BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_INSTANCE_NORM
-};
-
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
-{
-  static const BuiltinOperator values[] = {BuiltinOperator_ADD,
+  BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  BuiltinOperator_CUMSUM = 128,
+  BuiltinOperator_CALL_ONCE = 129,
+  BuiltinOperator_BROADCAST_TO = 130,
+  BuiltinOperator_RFFT2D = 131,
+  BuiltinOperator_CONV_3D = 132,
+  BuiltinOperator_IMAG = 133,
+  BuiltinOperator_REAL = 134,
+  BuiltinOperator_COMPLEX_ABS = 135,
+  BuiltinOperator_HASHTABLE = 136,
+  BuiltinOperator_HASHTABLE_FIND = 137,
+  BuiltinOperator_HASHTABLE_IMPORT = 138,
+  BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
+  BuiltinOperator_MIN = BuiltinOperator_BCQ_GATHER,
+  BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL
+};
+
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[150]
+{
+  static const BuiltinOperator values[] = {BuiltinOperator_BCQ_GATHER,
+                                           BuiltinOperator_BCQ_FULLY_CONNECTED,
+                                           BuiltinOperator_INSTANCE_NORM,
+                                           BuiltinOperator_ADD,
                                             BuiltinOperator_AVERAGE_POOL_2D,
                                             BuiltinOperator_CONCATENATION,
                                             BuiltinOperator_CONV_2D,
@@ -817,15 +892,36 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[130]
                                             BuiltinOperator_DENSIFY,
                                             BuiltinOperator_SEGMENT_SUM,
                                             BuiltinOperator_BATCH_MATMUL,
-                                           BuiltinOperator_BCQ_GATHER,
-                                           BuiltinOperator_BCQ_FULLY_CONNECTED,
-                                           BuiltinOperator_INSTANCE_NORM};
+                                           BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+                                           BuiltinOperator_CUMSUM,
+                                           BuiltinOperator_CALL_ONCE,
+                                           BuiltinOperator_BROADCAST_TO,
+                                           BuiltinOperator_RFFT2D,
+                                           BuiltinOperator_CONV_3D,
+                                           BuiltinOperator_IMAG,
+                                           BuiltinOperator_REAL,
+                                           BuiltinOperator_COMPLEX_ABS,
+                                           BuiltinOperator_HASHTABLE,
+                                           BuiltinOperator_HASHTABLE_FIND,
+                                           BuiltinOperator_HASHTABLE_IMPORT,
+                                           BuiltinOperator_HASHTABLE_SIZE,
+                                           BuiltinOperator_REDUCE_ALL,
+                                           BuiltinOperator_CONV_3D_TRANSPOSE,
+                                           BuiltinOperator_VAR_HANDLE,
+                                           BuiltinOperator_READ_VARIABLE,
+                                           BuiltinOperator_ASSIGN_VARIABLE,
+                                           BuiltinOperator_BROADCAST_ARGS,
+                                           BuiltinOperator_RANDOM_STANDARD_NORMAL};
    return values;
  }
  
  inline const char *const *EnumNamesBuiltinOperator()
  {
-  static const char *const names[256] = {"ADD",
+  static const char *const names[152] = {"BCQ_GATHER",
+                                         "BCQ_FULLY_CONNECTED",
+                                         "INSTANCE_NORM",
+                                         "",
+                                         "ADD",
                                           "AVERAGE_POOL_2D",
                                           "CONCATENATION",
                                           "CONV_2D",
@@ -952,147 +1048,40 @@ inline const char *const *EnumNamesBuiltinOperator()
                                           "DENSIFY",
                                           "SEGMENT_SUM",
                                           "BATCH_MATMUL",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "BCQ_GATHER",
-                                         "BCQ_FULLY_CONNECTED",
-                                         "INSTANCE_NORM",
+                                         "PLACEHOLDER_FOR_GREATER_OP_CODES",
+                                         "CUMSUM",
+                                         "CALL_ONCE",
+                                         "BROADCAST_TO",
+                                         "RFFT2D",
+                                         "CONV_3D",
+                                         "IMAG",
+                                         "REAL",
+                                         "COMPLEX_ABS",
+                                         "HASHTABLE",
+                                         "HASHTABLE_FIND",
+                                         "HASHTABLE_IMPORT",
+                                         "HASHTABLE_SIZE",
+                                         "REDUCE_ALL",
+                                         "CONV_3D_TRANSPOSE",
+                                         "VAR_HANDLE",
+                                         "READ_VARIABLE",
+                                         "ASSIGN_VARIABLE",
+                                         "BROADCAST_ARGS",
+                                         "RANDOM_STANDARD_NORMAL",
                                           nullptr};
    return names;
  }
  
  inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
  {
-  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_INSTANCE_NORM))
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_BCQ_GATHER,
+                              BuiltinOperator_RANDOM_STANDARD_NORMAL))
      return "";
-  const size_t index = static_cast<size_t>(e);
+  const size_t index = static_cast<size_t>(e) - static_cast<size_t>(BuiltinOperator_BCQ_GATHER);
    return EnumNamesBuiltinOperator()[index];
  }
  
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
  {
    BuiltinOptions_NONE = 0,
    BuiltinOptions_Conv2DOptions = 1,
@@ -1196,6 +1185,19 @@ enum BuiltinOptions
    BuiltinOptions_DensifyOptions = 99,
    BuiltinOptions_SegmentSumOptions = 100,
    BuiltinOptions_BatchMatMulOptions = 101,
+  BuiltinOptions_CumsumOptions = 102,
+  BuiltinOptions_CallOnceOptions = 103,
+  BuiltinOptions_BroadcastToOptions = 104,
+  BuiltinOptions_Rfft2dOptions = 105,
+  BuiltinOptions_Conv3DOptions = 106,
+  BuiltinOptions_HashtableOptions = 107,
+  BuiltinOptions_HashtableFindOptions = 108,
+  BuiltinOptions_HashtableImportOptions = 109,
+  BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
    BuiltinOptions_BCQGatherOptions = 252,
    BuiltinOptions_BCQFullyConnectedOptions = 253,
    BuiltinOptions_InstanceNormOptions = 254,
@@ -1203,7 +1205,7 @@ enum BuiltinOptions
    BuiltinOptions_MAX = BuiltinOptions_InstanceNormOptions
  };
  
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[118]
  {
    static const BuiltinOptions values[] = {BuiltinOptions_NONE,
                                            BuiltinOptions_Conv2DOptions,
@@ -1307,6 +1309,19 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[105]
                                            BuiltinOptions_DensifyOptions,
                                            BuiltinOptions_SegmentSumOptions,
                                            BuiltinOptions_BatchMatMulOptions,
+                                          BuiltinOptions_CumsumOptions,
+                                          BuiltinOptions_CallOnceOptions,
+                                          BuiltinOptions_BroadcastToOptions,
+                                          BuiltinOptions_Rfft2dOptions,
+                                          BuiltinOptions_Conv3DOptions,
+                                          BuiltinOptions_HashtableOptions,
+                                          BuiltinOptions_HashtableFindOptions,
+                                          BuiltinOptions_HashtableImportOptions,
+                                          BuiltinOptions_HashtableSizeOptions,
+                                          BuiltinOptions_VarHandleOptions,
+                                          BuiltinOptions_ReadVariableOptions,
+                                          BuiltinOptions_AssignVariableOptions,
+                                          BuiltinOptions_RandomOptions,
                                            BuiltinOptions_BCQGatherOptions,
                                            BuiltinOptions_BCQFullyConnectedOptions,
                                            BuiltinOptions_InstanceNormOptions};
@@ -1417,19 +1432,19 @@ inline const char *const *EnumNamesBuiltinOptions()
                                           "DensifyOptions",
                                           "SegmentSumOptions",
                                           "BatchMatMulOptions",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
-                                         "",
+                                         "CumsumOptions",
+                                         "CallOnceOptions",
+                                         "BroadcastToOptions",
+                                         "Rfft2dOptions",
+                                         "Conv3DOptions",
+                                         "HashtableOptions",
+                                         "HashtableFindOptions",
+                                         "HashtableImportOptions",
+                                         "HashtableSizeOptions",
+                                         "VarHandleOptions",
+                                         "ReadVariableOptions",
+                                         "AssignVariableOptions",
+                                         "RandomOptions",
                                           "",
                                           "",
                                           "",
@@ -2092,6 +2107,71 @@ template <> struct BuiltinOptionsTraits<circle::BatchMatMulOptions>
    static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
  };
  
+template <> struct BuiltinOptionsTraits<circle::CumsumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::CallOnceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::BroadcastToOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Rfft2dOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::Conv3DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableFindOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableImportOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::HashtableSizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::VarHandleOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::ReadVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::AssignVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<circle::RandomOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
  template <> struct BuiltinOptionsTraits<circle::BCQGatherOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_BCQGatherOptions;
@@ -2112,7 +2192,7 @@ bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
                                  const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                  const flatbuffers::Vector<uint8_t> *types);
  
-enum Padding
+enum Padding : int8_t
  {
    Padding_SAME = 0,
    Padding_VALID = 1,
@@ -2140,7 +2220,7 @@ inline const char *EnumNamePadding(Padding e)
    return EnumNamesPadding()[index];
  }
  
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
  {
    ActivationFunctionType_NONE = 0,
    ActivationFunctionType_RELU = 1,
@@ -2175,7 +2255,7 @@ inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
    return EnumNamesActivationFunctionType()[index];
  }
  
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
  {
    LSHProjectionType_UNKNOWN = 0,
    LSHProjectionType_SPARSE = 1,
@@ -2205,7 +2285,7 @@ inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
    return EnumNamesLSHProjectionType()[index];
  }
  
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
  {
    FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
@@ -2237,7 +2317,7 @@ inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOpti
    }
  }
  
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
  {
    LSTMKernelType_FULL = 0,
    LSTMKernelType_BASIC = 1,
@@ -2265,7 +2345,7 @@ inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
    return EnumNamesLSTMKernelType()[index];
  }
  
-enum CombinerType
+enum CombinerType : int8_t
  {
    CombinerType_SUM = 0,
    CombinerType_MEAN = 1,
@@ -2294,7 +2374,7 @@ inline const char *EnumNameCombinerType(CombinerType e)
    return EnumNamesCombinerType()[index];
  }
  
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
  {
    MirrorPadMode_REFLECT = 0,
    MirrorPadMode_SYMMETRIC = 1,
@@ -2322,7 +2402,7 @@ inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
    return EnumNamesMirrorPadMode()[index];
  }
  
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
  {
    CustomOptionsFormat_FLEXBUFFERS = 0,
    CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -2349,7 +2429,7 @@ inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
    return EnumNamesCustomOptionsFormat()[index];
  }
  
-enum DataFormat
+enum DataFormat : int8_t
  {
    DataFormat_CHANNELS_LAST = 0,
    DataFormat_CHANNELS_FIRST = 1,
@@ -2408,7 +2488,6 @@ struct CustomQuantizationBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
    flatbuffers::Offset<CustomQuantization> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2539,7 +2618,6 @@ struct QuantizationParametersBuilder
    {
      start_ = fbb_.StartTable();
    }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
    flatbuffers::Offset<QuantizationParameters> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2613,7 +2691,6 @@ struct Int32VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Int32VectorBuilder &operator=(const Int32VectorBuilder &);
    flatbuffers::Offset<Int32Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2670,7 +2747,6 @@ struct Uint16VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
    flatbuffers::Offset<Uint16Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2731,7 +2807,6 @@ struct Uint8VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
    flatbuffers::Offset<Uint8Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2912,7 +2987,6 @@ struct DimensionMetadataBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
    flatbuffers::Offset<DimensionMetadata> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2994,7 +3068,6 @@ struct SparsityParametersBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
    flatbuffers::Offset<SparsityParameters> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3121,7 +3194,6 @@ struct TensorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TensorBuilder &operator=(const TensorBuilder &);
    flatbuffers::Offset<Tensor> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3235,7 +3307,6 @@ struct Conv2DOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
    flatbuffers::Offset<Conv2DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3260,103 +3331,212 @@ inline flatbuffers::Offset<Conv2DOptions> CreateConv2DOptions(
    return builder_.Finish();
  }
  
-struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  typedef Pool2DOptionsBuilder Builder;
+  typedef Conv3DOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
-    VT_STRIDE_W = 6,
-    VT_STRIDE_H = 8,
-    VT_FILTER_WIDTH = 10,
-    VT_FILTER_HEIGHT = 12,
-    VT_FUSED_ACTIVATION_FUNCTION = 14
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
    };
    circle::Padding padding() const
    {
      return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
    }
+  int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
    int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
    int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
-  int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
-  int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
    circle::ActivationFunctionType fused_activation_function() const
    {
      return static_cast<circle::ActivationFunctionType>(
        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
+  int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
             VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
             VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
-           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
-           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
    }
  };
  
-struct Pool2DOptionsBuilder
+struct Conv3DOptionsBuilder
  {
-  typedef Pool2DOptions Table;
+  typedef Conv3DOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_padding(circle::Padding padding)
    {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
    }
    void add_stride_w(int32_t stride_w)
    {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
    }
    void add_stride_h(int32_t stride_h)
    {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
    }
-  void add_filter_width(int32_t filter_width)
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
    {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
    }
-  void add_filter_height(int32_t filter_height)
+  void add_dilation_d_factor(int32_t dilation_d_factor)
    {
-    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
    }
-  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  void add_dilation_w_factor(int32_t dilation_w_factor)
    {
-    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
-                            static_cast<int8_t>(fused_activation_function), 0);
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
    }
-  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
-  flatbuffers::Offset<Pool2DOptions> Finish()
+  flatbuffers::Offset<Conv3DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Pool2DOptions>(end);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
      return o;
    }
  };
  
-inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
    flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
-  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
-  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
  {
-  Pool2DOptionsBuilder builder_(_fbb);
-  builder_.add_filter_height(filter_height);
-  builder_.add_filter_width(filter_width);
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
    builder_.add_stride_h(stride_h);
    builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
    builder_.add_fused_activation_function(fused_activation_function);
    builder_.add_padding(padding);
    return builder_.Finish();
  }
  
-struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  typedef DepthwiseConv2DOptionsBuilder Builder;
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_W = 6,
+    VT_STRIDE_H = 8,
+    VT_FILTER_WIDTH = 10,
+    VT_FILTER_HEIGHT = 12,
+    VT_FUSED_ACTIVATION_FUNCTION = 14
+  };
+  circle::Padding padding() const
+  {
+    return static_cast<circle::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
+  int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_WIDTH) &&
+           VerifyField<int32_t>(verifier, VT_FILTER_HEIGHT) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+};
+
+struct Pool2DOptionsBuilder
+{
+  typedef Pool2DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(circle::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_filter_width(int32_t filter_width)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_WIDTH, filter_width, 0);
+  }
+  void add_filter_height(int32_t filter_height)
+  {
+    fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Pool2DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Pool2DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, circle::Padding padding = circle::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  Pool2DOptionsBuilder builder_(_fbb);
+  builder_.add_filter_height(filter_height);
+  builder_.add_filter_width(filter_width);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
+struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef DepthwiseConv2DOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
@@ -3431,7 +3611,6 @@ struct DepthwiseConv2DOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
    flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3509,7 +3688,6 @@ struct ConcatEmbeddingsOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
    flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3574,7 +3752,6 @@ struct LSHProjectionOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
    flatbuffers::Offset<LSHProjectionOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3639,7 +3816,6 @@ struct SVDFOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
    flatbuffers::Offset<SVDFOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3704,7 +3880,6 @@ struct RNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
    flatbuffers::Offset<RNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3775,7 +3950,6 @@ struct SequenceRNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
    flatbuffers::Offset<SequenceRNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3855,7 +4029,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
    flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3941,7 +4114,6 @@ struct FullyConnectedOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
    flatbuffers::Offset<FullyConnectedOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3990,7 +4162,6 @@ struct SoftmaxOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
    flatbuffers::Offset<SoftmaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4043,7 +4214,6 @@ struct ConcatenationOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
    flatbuffers::Offset<ConcatenationOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4067,17 +4237,20 @@ struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    typedef AddOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
    };
    circle::ActivationFunctionType fused_activation_function() const
    {
      return static_cast<circle::ActivationFunctionType>(
        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
    }
  };
  
@@ -4091,11 +4264,15 @@ struct AddOptionsBuilder
      fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
    }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
    explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
    flatbuffers::Offset<AddOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4106,9 +4283,11 @@ struct AddOptionsBuilder
  
  inline flatbuffers::Offset<AddOptions> CreateAddOptions(
    flatbuffers::FlatBufferBuilder &_fbb,
-  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
  {
    AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
    builder_.add_fused_activation_function(fused_activation_function);
    return builder_.Finish();
  }
@@ -4146,7 +4325,6 @@ struct MulOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
    flatbuffers::Offset<MulOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4197,7 +4375,6 @@ struct L2NormOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
    flatbuffers::Offset<L2NormOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4263,8 +4440,6 @@ struct LocalResponseNormalizationOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LocalResponseNormalizationOptionsBuilder &
-  operator=(const LocalResponseNormalizationOptionsBuilder &);
    flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4353,7 +4528,6 @@ struct LSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
    flatbuffers::Offset<LSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4445,8 +4619,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UnidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
    flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4546,8 +4718,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
    flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4608,7 +4778,6 @@ struct ResizeBilinearOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
    flatbuffers::Offset<ResizeBilinearOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4632,13 +4801,15 @@ struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
    typedef ResizeNearestNeighborOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_ALIGN_CORNERS = 4
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
    };
    bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
    }
  };
  
@@ -4652,11 +4823,15 @@ struct ResizeNearestNeighborOptionsBuilder
      fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
                               static_cast<uint8_t>(align_corners), 0);
    }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
    explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
    flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4666,9 +4841,11 @@ struct ResizeNearestNeighborOptionsBuilder
  };
  
  inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                                   bool half_pixel_centers = false)
  {
    ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
    builder_.add_align_corners(align_corners);
    return builder_.Finish();
  }
@@ -4701,7 +4878,6 @@ struct CallOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
    flatbuffers::Offset<CallOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4736,7 +4912,6 @@ struct PadOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
    flatbuffers::Offset<PadOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4769,7 +4944,6 @@ struct PadV2OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
    flatbuffers::Offset<PadV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4815,7 +4989,6 @@ struct ReshapeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
    flatbuffers::Offset<ReshapeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4859,7 +5032,6 @@ struct SpaceToBatchNDOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
    flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4893,7 +5065,6 @@ struct BatchToSpaceNDOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
    flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4951,7 +5122,6 @@ struct SkipGramOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
    flatbuffers::Offset<SkipGramOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4999,7 +5169,6 @@ struct SpaceToDepthOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
    flatbuffers::Offset<SpaceToDepthOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5044,7 +5213,6 @@ struct DepthToSpaceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
    flatbuffers::Offset<DepthToSpaceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5066,17 +5234,20 @@ struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    typedef SubOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
    };
    circle::ActivationFunctionType fused_activation_function() const
    {
      return static_cast<circle::ActivationFunctionType>(
        GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
    }
  };
  
@@ -5090,11 +5261,15 @@ struct SubOptionsBuilder
      fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
    }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
    explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
    flatbuffers::Offset<SubOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5105,9 +5280,11 @@ struct SubOptionsBuilder
  
  inline flatbuffers::Offset<SubOptions> CreateSubOptions(
    flatbuffers::FlatBufferBuilder &_fbb,
-  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE,
+  bool pot_scale_int16 = true)
  {
    SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
    builder_.add_fused_activation_function(fused_activation_function);
    return builder_.Finish();
  }
@@ -5145,7 +5322,6 @@ struct DivOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
    flatbuffers::Offset<DivOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5181,7 +5357,6 @@ struct TopKV2OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
    flatbuffers::Offset<TopKV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5228,7 +5403,6 @@ struct EmbeddingLookupSparseOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
    flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5251,13 +5425,15 @@ struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    typedef GatherOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_AXIS = 4
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
    };
    int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
    }
  };
  
@@ -5267,11 +5443,14 @@ struct GatherOptionsBuilder
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+  void add_batch_dims(int32_t batch_dims)
+  {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
    explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
    flatbuffers::Offset<GatherOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5280,10 +5459,11 @@ struct GatherOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                                              int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
  {
    GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
    builder_.add_axis(axis);
    return builder_.Finish();
  }
@@ -5306,7 +5486,6 @@ struct TransposeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
    flatbuffers::Offset<TransposeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5340,7 +5519,6 @@ struct ExpOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
    flatbuffers::Offset<ExpOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5373,7 +5551,6 @@ struct CosOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
    flatbuffers::Offset<CosOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5416,7 +5593,6 @@ struct ReducerOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
    flatbuffers::Offset<ReducerOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5464,7 +5640,6 @@ struct SqueezeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
    flatbuffers::Offset<SqueezeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5518,7 +5693,6 @@ struct SplitOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
    flatbuffers::Offset<SplitOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5563,7 +5737,6 @@ struct SplitVOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
    flatbuffers::Offset<SplitVOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5635,7 +5808,6 @@ struct StridedSliceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
    flatbuffers::Offset<StridedSliceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5676,7 +5848,6 @@ struct LogSoftmaxOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
    flatbuffers::Offset<LogSoftmaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5732,7 +5903,6 @@ struct CastOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CastOptionsBuilder &operator=(const CastOptionsBuilder &);
    flatbuffers::Offset<CastOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5770,7 +5940,6 @@ struct DequantizeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
    flatbuffers::Offset<DequantizeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5804,7 +5973,6 @@ struct MaximumMinimumOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
    flatbuffers::Offset<MaximumMinimumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5838,7 +6006,6 @@ struct TileOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TileOptionsBuilder &operator=(const TileOptionsBuilder &);
    flatbuffers::Offset<TileOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5884,7 +6051,6 @@ struct ArgMaxOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
    flatbuffers::Offset<ArgMaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5933,7 +6099,6 @@ struct ArgMinOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
    flatbuffers::Offset<ArgMinOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5969,7 +6134,6 @@ struct GreaterOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
    flatbuffers::Offset<GreaterOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6003,7 +6167,6 @@ struct GreaterEqualOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
    flatbuffers::Offset<GreaterEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6037,7 +6200,6 @@ struct LessOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LessOptionsBuilder &operator=(const LessOptionsBuilder &);
    flatbuffers::Offset<LessOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6070,7 +6232,6 @@ struct LessEqualOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
    flatbuffers::Offset<LessEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6104,7 +6265,6 @@ struct NegOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  NegOptionsBuilder &operator=(const NegOptionsBuilder &);
    flatbuffers::Offset<NegOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6137,7 +6297,6 @@ struct SelectOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
    flatbuffers::Offset<SelectOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6170,7 +6329,6 @@ struct SliceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
    flatbuffers::Offset<SliceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6229,7 +6387,6 @@ struct TransposeConvOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
    flatbuffers::Offset<TransposeConvOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6268,7 +6425,6 @@ struct ExpandDimsOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
    flatbuffers::Offset<ExpandDimsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6313,7 +6469,6 @@ struct SparseToDenseOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
    flatbuffers::Offset<SparseToDenseOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6348,7 +6503,6 @@ struct EqualOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
    flatbuffers::Offset<EqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6381,7 +6535,6 @@ struct NotEqualOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
    flatbuffers::Offset<NotEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6428,7 +6581,6 @@ struct ShapeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
    flatbuffers::Offset<ShapeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6464,7 +6616,6 @@ struct RankOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
    flatbuffers::Offset<RankOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6497,7 +6648,6 @@ struct PowOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  PowOptionsBuilder &operator=(const PowOptionsBuilder &);
    flatbuffers::Offset<PowOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6554,7 +6704,6 @@ struct FakeQuantOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
    flatbuffers::Offset<FakeQuantOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6606,7 +6755,6 @@ struct PackOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  PackOptionsBuilder &operator=(const PackOptionsBuilder &);
    flatbuffers::Offset<PackOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6642,7 +6790,6 @@ struct LogicalOrOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
    flatbuffers::Offset<LogicalOrOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6683,7 +6830,6 @@ struct OneHotOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
    flatbuffers::Offset<OneHotOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6718,7 +6864,6 @@ struct AbsOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
    flatbuffers::Offset<AbsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6751,7 +6896,6 @@ struct HardSwishOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
    flatbuffers::Offset<HardSwishOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6785,7 +6929,6 @@ struct LogicalAndOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
    flatbuffers::Offset<LogicalAndOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6819,7 +6962,6 @@ struct LogicalNotOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
    flatbuffers::Offset<LogicalNotOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6863,7 +7005,6 @@ struct UnpackOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
    flatbuffers::Offset<UnpackOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6899,7 +7040,6 @@ struct FloorDivOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
    flatbuffers::Offset<FloorDivOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6933,7 +7073,6 @@ struct SquareOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
    flatbuffers::Offset<SquareOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6966,7 +7105,6 @@ struct ZerosLikeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
    flatbuffers::Offset<ZerosLikeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7000,7 +7138,6 @@ struct FillOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
    flatbuffers::Offset<FillOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7033,7 +7170,6 @@ struct FloorModOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
    flatbuffers::Offset<FloorModOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7067,7 +7203,6 @@ struct RangeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
    flatbuffers::Offset<RangeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7107,7 +7242,6 @@ struct LeakyReluOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
    flatbuffers::Offset<LeakyReluOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7142,7 +7276,6 @@ struct SquaredDifferenceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
    flatbuffers::Offset<SquaredDifferenceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7189,7 +7322,6 @@ struct MirrorPadOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
    flatbuffers::Offset<MirrorPadOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7238,7 +7370,6 @@ struct UniqueOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
    flatbuffers::Offset<UniqueOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7274,7 +7405,6 @@ struct ReverseV2OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
    flatbuffers::Offset<ReverseV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7308,7 +7438,6 @@ struct AddNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
    flatbuffers::Offset<AddNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7341,7 +7470,6 @@ struct GatherNdOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
    flatbuffers::Offset<GatherNdOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7375,7 +7503,6 @@ struct WhereOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
    flatbuffers::Offset<WhereOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7424,7 +7551,6 @@ struct ReverseSequenceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
    flatbuffers::Offset<ReverseSequenceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7461,7 +7587,6 @@ struct MatrixDiagOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
    flatbuffers::Offset<MatrixDiagOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7495,7 +7620,6 @@ struct QuantizeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
    flatbuffers::Offset<QuantizeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7529,7 +7653,6 @@ struct MatrixSetDiagOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
    flatbuffers::Offset<MatrixSetDiagOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7579,7 +7702,6 @@ struct IfOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  IfOptionsBuilder &operator=(const IfOptionsBuilder &);
    flatbuffers::Offset<IfOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7598,6 +7720,50 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
    return builder_.Finish();
  }
  
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct CallOnceOptionsBuilder
+{
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
  struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
    typedef WhileOptionsBuilder Builder;
@@ -7632,7 +7798,6 @@ struct WhileOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
    flatbuffers::Offset<WhileOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7669,7 +7834,6 @@ struct NonMaxSuppressionV4OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
    flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7703,7 +7867,6 @@ struct NonMaxSuppressionV5OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
    flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7737,7 +7900,6 @@ struct ScatterNdOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
    flatbuffers::Offset<ScatterNdOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7771,7 +7933,6 @@ struct SelectV2OptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
    flatbuffers::Offset<SelectV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7805,7 +7966,6 @@ struct DensifyOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
    flatbuffers::Offset<DensifyOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7839,7 +7999,6 @@ struct SegmentSumOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
    flatbuffers::Offset<SegmentSumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7861,14 +8020,20 @@ struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_ADJOINT_LHS = 4,
-    VT_ADJOINT_RHS = 6
+    VT_ADJOINT_RHS = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
    };
    bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
    bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
    }
  };
  
@@ -7887,11 +8052,15 @@ struct BatchMatMulOptionsBuilder
      fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
                               0);
    }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
+  }
    explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
    flatbuffers::Offset<BatchMatMulOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7902,154 +8071,616 @@ struct BatchMatMulOptionsBuilder
  
  inline flatbuffers::Offset<BatchMatMulOptions>
  CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
-                         bool adjoint_rhs = false)
+                         bool adjoint_rhs = false, bool asymmetric_quantize_inputs = false)
  {
    BatchMatMulOptionsBuilder builder_(_fbb);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
    builder_.add_adjoint_rhs(adjoint_rhs);
    builder_.add_adjoint_lhs(adjoint_lhs);
    return builder_.Finish();
  }
  
-struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  typedef BCQGatherOptionsBuilder Builder;
+  typedef CumsumOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_INPUT_HIDDEN_SIZE = 4,
-    VT_AXIS = 6
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
    };
-  int32_t input_hidden_size() const { return GetField<int32_t>(VT_INPUT_HIDDEN_SIZE, 0); }
-  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+  bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INPUT_HIDDEN_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
    }
  };
  
-struct BCQGatherOptionsBuilder
+struct CumsumOptionsBuilder
  {
-  typedef BCQGatherOptions Table;
+  typedef CumsumOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_input_hidden_size(int32_t input_hidden_size)
+  void add_exclusive(bool exclusive)
    {
-    fbb_.AddElement<int32_t>(BCQGatherOptions::VT_INPUT_HIDDEN_SIZE, input_hidden_size, 0);
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
    }
-  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(BCQGatherOptions::VT_AXIS, axis, 0); }
-  explicit BCQGatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  void add_reverse(bool reverse)
+  {
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
+  }
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  BCQGatherOptionsBuilder &operator=(const BCQGatherOptionsBuilder &);
-  flatbuffers::Offset<BCQGatherOptions> Finish()
+  flatbuffers::Offset<CumsumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BCQGatherOptions>(end);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
      return o;
    }
  };
  
-inline flatbuffers::Offset<BCQGatherOptions>
-CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t input_hidden_size = 0,
-                       int32_t axis = 0)
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              bool exclusive = false,
+                                                              bool reverse = false)
  {
-  BCQGatherOptionsBuilder builder_(_fbb);
-  builder_.add_axis(axis);
-  builder_.add_input_hidden_size(input_hidden_size);
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
    return builder_.Finish();
  }
  
-struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  typedef BCQFullyConnectedOptionsBuilder Builder;
-  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
-  {
-    VT_WEIGHTS_HIDDEN_SIZE = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
-  };
-  int32_t weights_hidden_size() const { return GetField<int32_t>(VT_WEIGHTS_HIDDEN_SIZE, 0); }
-  circle::ActivationFunctionType fused_activation_function() const
-  {
-    return static_cast<circle::ActivationFunctionType>(
-      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
-  }
+  typedef BroadcastToOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_WEIGHTS_HIDDEN_SIZE) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+    return VerifyTableStart(verifier) && verifier.EndTable();
    }
  };
  
-struct BCQFullyConnectedOptionsBuilder
+struct BroadcastToOptionsBuilder
  {
-  typedef BCQFullyConnectedOptions Table;
+  typedef BroadcastToOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_weights_hidden_size(int32_t weights_hidden_size)
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
-    fbb_.AddElement<int32_t>(BCQFullyConnectedOptions::VT_WEIGHTS_HIDDEN_SIZE, weights_hidden_size,
-                             0);
+    start_ = fbb_.StartTable();
    }
-  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  flatbuffers::Offset<BroadcastToOptions> Finish()
    {
-    fbb_.AddElement<int8_t>(BCQFullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
-                            static_cast<int8_t>(fused_activation_function), 0);
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
    }
-  explicit BCQFullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct Rfft2dOptionsBuilder
+{
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  BCQFullyConnectedOptionsBuilder &operator=(const BCQFullyConnectedOptionsBuilder &);
-  flatbuffers::Offset<BCQFullyConnectedOptions> Finish()
+  flatbuffers::Offset<Rfft2dOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BCQFullyConnectedOptions>(end);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
      return o;
    }
  };
  
-inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
-  flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
-  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
  {
-  BCQFullyConnectedOptionsBuilder builder_(_fbb);
-  builder_.add_weights_hidden_size(weights_hidden_size);
-  builder_.add_fused_activation_function(fused_activation_function);
+  Rfft2dOptionsBuilder builder_(_fbb);
    return builder_.Finish();
  }
  
-struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  typedef InstanceNormOptionsBuilder Builder;
+  typedef HashtableOptionsBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_EPSILON = 4,
-    VT_FUSED_ACTIVATION_FUNCTION = 6
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
    };
-  float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
-  circle::ActivationFunctionType fused_activation_function() const
+  int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+  circle::TensorType key_dtype() const
    {
-    return static_cast<circle::ActivationFunctionType>(
-      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  circle::TensorType value_dtype() const
+  {
+    return static_cast<circle::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
    }
  };
  
-struct InstanceNormOptionsBuilder
+struct HashtableOptionsBuilder
  {
-  typedef InstanceNormOptions Table;
+  typedef HashtableOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_epsilon(float epsilon)
+  void add_table_id(int32_t table_id)
    {
-    fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
    }
-  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  void add_key_dtype(circle::TensorType key_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(circle::TensorType value_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+                       circle::TensorType key_dtype = circle::TensorType_FLOAT32,
+                       circle::TensorType value_dtype = circle::TensorType_FLOAT32)
+{
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableFindOptionsBuilder
+{
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableImportOptionsBuilder
+{
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) && verifier.EndTable();
+  }
+};
+
+struct VarHandleOptionsBuilder
+{
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::String> container = 0,
+                       flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+                             const char *shared_name = nullptr)
+{
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return circle::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ReadVariableOptionsBuilder
+{
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct AssignVariableOptionsBuilder
+{
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); }
+  int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) &&
+           VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable();
+  }
+};
+
+struct RandomOptionsBuilder
+{
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); }
+  void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t seed = 0, int32_t seed2 = 0)
+{
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+struct BCQGatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BCQGatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUT_HIDDEN_SIZE = 4,
+    VT_AXIS = 6
+  };
+  int32_t input_hidden_size() const { return GetField<int32_t>(VT_INPUT_HIDDEN_SIZE, 0); }
+  int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INPUT_HIDDEN_SIZE) &&
+           VerifyField<int32_t>(verifier, VT_AXIS) && verifier.EndTable();
+  }
+};
+
+struct BCQGatherOptionsBuilder
+{
+  typedef BCQGatherOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_input_hidden_size(int32_t input_hidden_size)
+  {
+    fbb_.AddElement<int32_t>(BCQGatherOptions::VT_INPUT_HIDDEN_SIZE, input_hidden_size, 0);
+  }
+  void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(BCQGatherOptions::VT_AXIS, axis, 0); }
+  explicit BCQGatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BCQGatherOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BCQGatherOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BCQGatherOptions>
+CreateBCQGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t input_hidden_size = 0,
+                       int32_t axis = 0)
+{
+  BCQGatherOptionsBuilder builder_(_fbb);
+  builder_.add_axis(axis);
+  builder_.add_input_hidden_size(input_hidden_size);
+  return builder_.Finish();
+}
+
+struct BCQFullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef BCQFullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_WEIGHTS_HIDDEN_SIZE = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  int32_t weights_hidden_size() const { return GetField<int32_t>(VT_WEIGHTS_HIDDEN_SIZE, 0); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_WEIGHTS_HIDDEN_SIZE) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+};
+
+struct BCQFullyConnectedOptionsBuilder
+{
+  typedef BCQFullyConnectedOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_weights_hidden_size(int32_t weights_hidden_size)
+  {
+    fbb_.AddElement<int32_t>(BCQFullyConnectedOptions::VT_WEIGHTS_HIDDEN_SIZE, weights_hidden_size,
+                             0);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(BCQFullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  explicit BCQFullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BCQFullyConnectedOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BCQFullyConnectedOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BCQFullyConnectedOptions> CreateBCQFullyConnectedOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, int32_t weights_hidden_size = 0,
+  circle::ActivationFunctionType fused_activation_function = circle::ActivationFunctionType_NONE)
+{
+  BCQFullyConnectedOptionsBuilder builder_(_fbb);
+  builder_.add_weights_hidden_size(weights_hidden_size);
+  builder_.add_fused_activation_function(fused_activation_function);
+  return builder_.Finish();
+}
+
+struct InstanceNormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef InstanceNormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_EPSILON = 4,
+    VT_FUSED_ACTIVATION_FUNCTION = 6
+  };
+  float epsilon() const { return GetField<float>(VT_EPSILON, 0.0f); }
+  circle::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<circle::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<float>(verifier, VT_EPSILON) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+  }
+};
+
+struct InstanceNormOptionsBuilder
+{
+  typedef InstanceNormOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_epsilon(float epsilon)
+  {
+    fbb_.AddElement<float>(InstanceNormOptions::VT_EPSILON, epsilon, 0.0f);
+  }
+  void add_fused_activation_function(circle::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(InstanceNormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -8058,7 +8689,6 @@ struct InstanceNormOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  InstanceNormOptionsBuilder &operator=(const InstanceNormOptionsBuilder &);
    flatbuffers::Offset<InstanceNormOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8082,24 +8712,28 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    typedef OperatorCodeBuilder Builder;
    enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_BUILTIN_CODE = 4,
+    VT_DEPRECATED_BUILTIN_CODE = 4,
      VT_CUSTOM_CODE = 6,
-    VT_VERSION = 8
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
    };
-  circle::BuiltinOperator builtin_code() const
-  {
-    return static_cast<circle::BuiltinOperator>(GetField<uint8_t>(VT_BUILTIN_CODE, 0));
-  }
+  int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
    const flatbuffers::String *custom_code() const
    {
      return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
    }
    int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  circle::BuiltinOperator builtin_code() const
+  {
+    return static_cast<circle::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_BUILTIN_CODE) &&
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
             VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
    }
  };
  
@@ -8108,9 +8742,9 @@ struct OperatorCodeBuilder
    typedef OperatorCode Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_builtin_code(circle::BuiltinOperator builtin_code)
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
    {
-    fbb_.AddElement<uint8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<uint8_t>(builtin_code), 0);
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
    }
    void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
    {
@@ -8120,11 +8754,14 @@ struct OperatorCodeBuilder
    {
      fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
    }
+  void add_builtin_code(circle::BuiltinOperator builtin_code)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
    explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
    flatbuffers::Offset<OperatorCode> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8134,24 +8771,26 @@ struct OperatorCodeBuilder
  };
  
  inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
-                   circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD,
-                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+                   circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
  {
    OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
    builder_.add_version(version);
    builder_.add_custom_code(custom_code);
-  builder_.add_builtin_code(builtin_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
    return builder_.Finish();
  }
  
  inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                         circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD,
-                         const char *custom_code = nullptr, int32_t version = 1)
+CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                         const char *custom_code = nullptr, int32_t version = 1,
+                         circle::BuiltinOperator builtin_code = circle::BuiltinOperator_ADD)
  {
    auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
-  return circle::CreateOperatorCode(_fbb, builtin_code, custom_code__, version);
+  return circle::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+                                    builtin_code);
  }
  
  struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
@@ -8796,6 +9435,84 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
               ? static_cast<const circle::BatchMatMulOptions *>(builtin_options())
               : nullptr;
    }
+  const circle::CumsumOptions *builtin_options_as_CumsumOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CumsumOptions
+             ? static_cast<const circle::CumsumOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::CallOnceOptions *builtin_options_as_CallOnceOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_CallOnceOptions
+             ? static_cast<const circle::CallOnceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_BroadcastToOptions
+             ? static_cast<const circle::BroadcastToOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Rfft2dOptions
+             ? static_cast<const circle::Rfft2dOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::Conv3DOptions *builtin_options_as_Conv3DOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_Conv3DOptions
+             ? static_cast<const circle::Conv3DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableOptions *builtin_options_as_HashtableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableOptions
+             ? static_cast<const circle::HashtableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableFindOptions
+             ? static_cast<const circle::HashtableFindOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableImportOptions
+             ? static_cast<const circle::HashtableImportOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_HashtableSizeOptions
+             ? static_cast<const circle::HashtableSizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::VarHandleOptions *builtin_options_as_VarHandleOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_VarHandleOptions
+             ? static_cast<const circle::VarHandleOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_ReadVariableOptions
+             ? static_cast<const circle::ReadVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_AssignVariableOptions
+             ? static_cast<const circle::AssignVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const circle::RandomOptions *builtin_options_as_RandomOptions() const
+  {
+    return builtin_options_type() == circle::BuiltinOptions_RandomOptions
+             ? static_cast<const circle::RandomOptions *>(builtin_options())
+             : nullptr;
+  }
    const circle::BCQGatherOptions *builtin_options_as_BCQGatherOptions() const
    {
      return builtin_options_type() == circle::BuiltinOptions_BCQGatherOptions
@@ -9497,6 +10214,92 @@ Operator::builtin_options_as<circle::BatchMatMulOptions>() const
    return builtin_options_as_BatchMatMulOptions();
  }
  
+template <>
+inline const circle::CumsumOptions *Operator::builtin_options_as<circle::CumsumOptions>() const
+{
+  return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const circle::CallOnceOptions *Operator::builtin_options_as<circle::CallOnceOptions>() const
+{
+  return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const circle::BroadcastToOptions *
+Operator::builtin_options_as<circle::BroadcastToOptions>() const
+{
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const circle::Rfft2dOptions *Operator::builtin_options_as<circle::Rfft2dOptions>() const
+{
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const circle::Conv3DOptions *Operator::builtin_options_as<circle::Conv3DOptions>() const
+{
+  return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const circle::HashtableOptions *
+Operator::builtin_options_as<circle::HashtableOptions>() const
+{
+  return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const circle::HashtableFindOptions *
+Operator::builtin_options_as<circle::HashtableFindOptions>() const
+{
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const circle::HashtableImportOptions *
+Operator::builtin_options_as<circle::HashtableImportOptions>() const
+{
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const circle::HashtableSizeOptions *
+Operator::builtin_options_as<circle::HashtableSizeOptions>() const
+{
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const circle::VarHandleOptions *
+Operator::builtin_options_as<circle::VarHandleOptions>() const
+{
+  return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const circle::ReadVariableOptions *
+Operator::builtin_options_as<circle::ReadVariableOptions>() const
+{
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const circle::AssignVariableOptions *
+Operator::builtin_options_as<circle::AssignVariableOptions>() const
+{
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const circle::RandomOptions *Operator::builtin_options_as<circle::RandomOptions>() const
+{
+  return builtin_options_as_RandomOptions();
+}
+
  template <>
  inline const circle::BCQGatherOptions *
  Operator::builtin_options_as<circle::BCQGatherOptions>() const
@@ -9566,7 +10369,6 @@ struct OperatorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  OperatorBuilder &operator=(const OperatorBuilder &);
    flatbuffers::Offset<Operator> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -9705,7 +10507,6 @@ struct SubGraphBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SubGraphBuilder &operator=(const SubGraphBuilder &);
    flatbuffers::Offset<SubGraph> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -9781,7 +10582,6 @@ struct BufferBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BufferBuilder &operator=(const BufferBuilder &);
    flatbuffers::Offset<Buffer> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -9845,7 +10645,6 @@ struct MetadataBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MetadataBuilder &operator=(const MetadataBuilder &);
    flatbuffers::Offset<Metadata> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -9872,6 +10671,168 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
    return circle::CreateMetadata(_fbb, name__, buffer);
  }
  
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct TensorMapBuilder
+{
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index)
+  {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+                flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            const char *name = nullptr,
+                                                            uint32_t tensor_index = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return circle::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>> *>(
+      VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+};
+
+struct SignatureDefBuilder
+{
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+  {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index)
+  {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::TensorMap>>> outputs = 0,
+  flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *inputs = nullptr,
+  const std::vector<flatbuffers::Offset<circle::TensorMap>> *outputs = nullptr,
+  const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*inputs) : 0;
+  auto outputs__ =
+    outputs ? _fbb.CreateVector<flatbuffers::Offset<circle::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return circle::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__, subgraph_index);
+}
+
  struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
    typedef ModelBuilder Builder;
@@ -9883,7 +10844,8 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_DESCRIPTION = 10,
      VT_BUFFERS = 12,
      VT_METADATA_BUFFER = 14,
-    VT_METADATA = 16
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
    };
    uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
    const flatbuffers::Vector<flatbuffers::Offset<circle::OperatorCode>> *operator_codes() const
@@ -9913,6 +10875,11 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>> *>(
        VT_METADATA);
    }
+  const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>> *>(
+      VT_SIGNATURE_DEFS);
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<uint32_t>(verifier, VT_VERSION) &&
@@ -9924,7 +10891,9 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
             verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
             VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
             VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
-           verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+           verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
    }
  };
  
@@ -9963,11 +10932,16 @@ struct ModelBuilder
    {
      fbb_.AddOffset(Model::VT_METADATA, metadata);
    }
+  void add_signature_defs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+      signature_defs)
+  {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
    explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ModelBuilder &operator=(const ModelBuilder &);
    flatbuffers::Offset<Model> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -9984,9 +10958,12 @@ inline flatbuffers::Offset<Model> CreateModel(
    flatbuffers::Offset<flatbuffers::String> description = 0,
    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Buffer>>> buffers = 0,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0)
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::Metadata>>> metadata = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<circle::SignatureDef>>>
+    signature_defs = 0)
  {
    ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
    builder_.add_metadata(metadata);
    builder_.add_metadata_buffer(metadata_buffer);
    builder_.add_buffers(buffers);
@@ -10004,7 +10981,8 @@ inline flatbuffers::Offset<Model> CreateModelDirect(
    const char *description = nullptr,
    const std::vector<flatbuffers::Offset<circle::Buffer>> *buffers = nullptr,
    const std::vector<int32_t> *metadata_buffer = nullptr,
-  const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr)
+  const std::vector<flatbuffers::Offset<circle::Metadata>> *metadata = nullptr,
+  const std::vector<flatbuffers::Offset<circle::SignatureDef>> *signature_defs = nullptr)
  {
    auto operator_codes__ =
      operator_codes ? _fbb.CreateVector<flatbuffers::Offset<circle::OperatorCode>>(*operator_codes)
@@ -10016,8 +10994,11 @@ inline flatbuffers::Offset<Model> CreateModelDirect(
    auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
    auto metadata__ =
      metadata ? _fbb.CreateVector<flatbuffers::Offset<circle::Metadata>>(*metadata) : 0;
+  auto signature_defs__ =
+    signature_defs ? _fbb.CreateVector<flatbuffers::Offset<circle::SignatureDef>>(*signature_defs)
+                   : 0;
    return circle::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__, buffers__,
-                             metadata_buffer__, metadata__);
+                             metadata_buffer__, metadata__, signature_defs__);
  }
  
  inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -10621,6 +11602,71 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
        auto ptr = reinterpret_cast<const circle::BatchMatMulOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const circle::RandomOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
      case BuiltinOptions_BCQGatherOptions:
      {
        auto ptr = reinterpret_cast<const circle::BCQGatherOptions *>(obj);
diff --git a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc

index 9ecb7d190fe0a6aaa4da372e1f0e87ed4015a4da..ba739f6180c8afab635867199f8ebda87d4169bd 100644 (file)
--- a/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
+++ b/runtime/onert/frontend/nnapi/wrapper/OperationFactory.cc
@@ -82,6 +82,27 @@ uint32_t getUint32Scalar(Operands &operands, const OperandIndex index)
    return static_cast<uint32_t>(int32_value);
  }
  
+Activation getActivation(Operands &operands, const OperandIndex index)
+{
+  switch (operands.at(index).asScalar<int32_t>())
+  {
+    case 0:
+      return Activation::NONE;
+    case 1:
+      return Activation::RELU;
+    case 2:
+      return Activation::RELU1;
+    case 3:
+      return Activation::RELU6;
+    case 4:
+      return Activation::TANH;
+    case 6:
+      return Activation::SIGMOID;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+
  OperationFactory::Generator
  getElementwiseActivationGenerator(const onert::ir::operation::ElementwiseActivation::Type op_type,
                                    float alpha = 0.f, float beta = 0.f)
@@ -519,10 +540,6 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_CAST] =
      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::CAST);
  
-  // ANEURALNETWORKS_CAST_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_CAST_EX
-  _map[ANEURALNETWORKS_CAST_EX] = _map[ANEURALNETWORKS_CAST];
-
    _map[ANEURALNETWORKS_CONV_2D] = [](const OperationFactory::Param &init_param,
                                       Operands &operands) {
      using operation::Conv2D;
@@ -651,10 +668,6 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_REDUCE_SUM] =
      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::SUM);
  
-  // ANEURALNETWORKS_REDUCE_SUM_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_SUM_EX
-  _map[ANEURALNETWORKS_REDUCE_SUM_EX] = _map[ANEURALNETWORKS_REDUCE_SUM];
-
    _map[ANEURALNETWORKS_SUB] =
      getBinaryArithmeticGenerator(onert::ir::operation::BinaryArithmetic::ArithmeticType::SUB);
  
@@ -770,10 +783,6 @@ OperationFactory::OperationFactory()
  
    _map[ANEURALNETWORKS_EXP] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::EXP);
  
-  // ANEURALNETWORKS_EXP_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_EXP_EX
-  _map[ANEURALNETWORKS_EXP_EX] = _map[ANEURALNETWORKS_EXP];
-
    // Each input should be interpreted as follows:
    //  0 -> Input Tensor Index
    //  1 -> Axis Tensor Index
@@ -791,52 +800,6 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_EQUAL] =
      getComparisonGenerator(operation::Comparison::ComparisonType::Equal);
  
-  // ANEURALNETWORKS_GREATER_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_GREATER_EQUAL_EX
-  _map[ANEURALNETWORKS_GREATER_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                              Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::GreaterEqual;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
-  // ANEURALNETWORKS_LESS_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LESS_EX
-  _map[ANEURALNETWORKS_LESS_EX] = [](const OperationFactory::Param &init_param,
-                                     Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Less;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_REDUCE_ALL] =
      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::ALL);
  
@@ -846,61 +809,9 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_REDUCE_MAX] =
      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MAX);
  
-  // ANEURALNETWORKS_REDUCE_MAX_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_MAX_EX
-  _map[ANEURALNETWORKS_REDUCE_MAX_EX] = _map[ANEURALNETWORKS_REDUCE_MAX];
-
-  // ANEURALNETWORKS_NOT_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_NOT_EQUAL_EX
-  _map[ANEURALNETWORKS_NOT_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                          Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input1 Tensor Index
-    //  1 -> input2 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::NotEqual;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_LOGICAL_AND] =
      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND);
  
-  // ANEURALNETWORKS_LOGICAL_AND_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_AND_EX
-  _map[ANEURALNETWORKS_LOGICAL_AND_EX] = [](const OperationFactory::Param &init_param,
-                                            Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, inputs.at(1), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    operation::ElementwiseBinary::Param param;
-    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_AND;
-
-    return new operation::ElementwiseBinary{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_RSQRT] =
      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::RSQRT);
  
@@ -919,24 +830,7 @@ OperationFactory::OperationFactory()
      return new operation::Select{inputs, outputs};
    };
  
-  _map[ANEURALNETWORKS_SELECT_V2_EX] = [](const OperationFactory::Param &init_param, Operands &) {
-    assert(init_param.input_count == 3 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> Condition Tensor Index
-    //  1 -> Input X(true) Tensor Index
-    //  2 -> Input Y(false) Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1], init_param.inputs[2]};
-
-    return new operation::Select{inputs, outputs};
-  };
-
-  // ANEURALNETWORKS_RSQRT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_RSQRT_EX
-  _map[ANEURALNETWORKS_RSQRT_EX] = _map[ANEURALNETWORKS_RSQRT];
+  _map[ANEURALNETWORKS_SELECT_V2_EX] = _map[ANEURALNETWORKS_SELECT];
  
    _map[ANEURALNETWORKS_RELU] =
      getElementwiseActivationGenerator(onert::ir::operation::ElementwiseActivation::Type::RELU,
@@ -1141,10 +1035,6 @@ OperationFactory::OperationFactory()
      return new operation::PReLU{inputs, outputs};
    };
  
-  // ANEURALNETWORKS_PRELU_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_PRELU_EX
-  _map[ANEURALNETWORKS_PRELU_EX] = _map[ANEURALNETWORKS_PRELU];
-
    _map[ANEURALNETWORKS_TRANSPOSE_CONV_EX] = [](const OperationFactory::Param &init_param,
                                                 Operands &operands) {
      assert(init_param.input_count == 6 && init_param.output_count == 1);
@@ -1178,64 +1068,12 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_SQRT] =
      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::SQRT);
  
-  // ANEURALNETWORKS_SQRT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_SQRT_EX
-  _map[ANEURALNETWORKS_SQRT_EX] = _map[ANEURALNETWORKS_SQRT];
-
    _map[ANEURALNETWORKS_LOGICAL_OR] =
      getElementwiseBinaryGenerator(operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR);
  
-  // ANEURALNETWORKS_LOGICAL_OR_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_OR_EX
-  _map[ANEURALNETWORKS_LOGICAL_OR_EX] = [](const OperationFactory::Param &init_param,
-                                           Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, inputs.at(1), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    operation::ElementwiseBinary::Param param;
-    param.op_type = operation::ElementwiseBinary::ElementwiseBinaryType::LOGICAL_OR;
-
-    return new operation::ElementwiseBinary{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_LOGICAL_NOT] =
      getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::LOGICAL_NOT);
  
-  // ANEURALNETWORKS_LOGICAL_NOT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_LOGICAL_NOT_EX
-  _map[ANEURALNETWORKS_LOGICAL_NOT_EX] = [](const OperationFactory::Param &init_param,
-                                            Operands &operands) {
-    assert(init_param.input_count == 1 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0]};
-
-    // This operation's operands must be boolean type.
-    replaceDataType(operands, inputs.at(0), DataType::BOOL8);
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    operation::ElementwiseUnary::Param param;
-    param.op_type = operation::ElementwiseUnary::Type::LOGICAL_NOT;
-
-    return new operation::ElementwiseUnary{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_LSTM] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 23 && init_param.output_count == 4);
  
@@ -1280,31 +1118,7 @@ OperationFactory::OperationFactory()
      }
  
      operation::LSTM::Param param;
-    const auto activation_index = OperandIndex{init_param.inputs[20]};
-    switch (operands.at(activation_index).asScalar<int32_t>())
-    {
-      case 0:
-        param.activation = Activation::NONE;
-        break;
-      case 1:
-        param.activation = Activation::RELU;
-        break;
-      case 2:
-        param.activation = Activation::RELU1;
-        break;
-      case 3:
-        param.activation = Activation::RELU6;
-        break;
-      case 4:
-        param.activation = Activation::TANH;
-        break;
-      case 6:
-        param.activation = Activation::SIGMOID;
-        break;
-      default:
-        throw std::runtime_error("Unsupported activation type");
-        break;
-    }
+    param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
      param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
      param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
      // This is initialization to prevent warning or error by static code analyzer. LSTM operation
@@ -1378,31 +1192,7 @@ OperationFactory::OperationFactory()
                                   output_index};
  
      operation::LSTM::Param param;
-    const auto activation_index = OperandIndex{init_param.inputs[20]};
-    switch (operands.at(activation_index).asScalar<int32_t>())
-    {
-      case 0:
-        param.activation = Activation::NONE;
-        break;
-      case 1:
-        param.activation = Activation::RELU;
-        break;
-      case 2:
-        param.activation = Activation::RELU1;
-        break;
-      case 3:
-        param.activation = Activation::RELU6;
-        break;
-      case 4:
-        param.activation = Activation::TANH;
-        break;
-      case 6:
-        param.activation = Activation::SIGMOID;
-        break;
-      default:
-        throw std::runtime_error("Unsupported activation type");
-        break;
-    }
+    param.activation = getActivation(operands, OperandIndex{init_param.inputs[20]});
      param.cell_threshold = operands.at(OperandIndex{init_param.inputs[21]}).asScalar<float>();
      param.projection_threshold = operands.at(OperandIndex{init_param.inputs[22]}).asScalar<float>();
      param.time_major = operands.at(OperandIndex{init_param.inputs[23]}).asScalar<bool>();
@@ -1410,29 +1200,6 @@ OperationFactory::OperationFactory()
      return new operation::LSTM{inputs, outputs, param};
    };
  
-  // ANEURALNETWORKS_EQUAL_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_EQUAL_EX
-  _map[ANEURALNETWORKS_EQUAL_EX] = [](const OperationFactory::Param &init_param,
-                                      Operands &operands) {
-    assert(init_param.input_count == 2 && init_param.output_count == 1);
-
-    OperandIndexSequence outputs{init_param.outputs[0]};
-
-    // Each input should be interpreted as follows:
-    //
-    //  0 -> input0 Tensor Index
-    //  1 -> input1 Tensor Index
-    OperandIndexSequence inputs{init_param.inputs[0], init_param.inputs[1]};
-
-    operation::Comparison::Param param;
-    param.comparison_type = operation::Comparison::ComparisonType::Equal;
-
-    // Output operand type must be boolean
-    replaceDataType(operands, outputs.at(0), DataType::BOOL8);
-
-    return new operation::Comparison{inputs, outputs, param};
-  };
-
    _map[ANEURALNETWORKS_SQUARED_DIFFERENCE_EX] = [](const OperationFactory::Param &init_param,
                                                     Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
@@ -1470,10 +1237,6 @@ OperationFactory::OperationFactory()
      return new operation::TopKV2{inputs, outputs, param};
    };
  
-  // ANEURALNETWORKS_CAST_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_CAST_EX
-  _map[ANEURALNETWORKS_TOPK_V2_EX] = _map[ANEURALNETWORKS_TOPK_V2];
-
    _map[ANEURALNETWORKS_GATHER] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 3 && init_param.output_count == 1);
  
@@ -1492,22 +1255,10 @@ OperationFactory::OperationFactory()
      return new operation::Gather{inputs, outputs, param};
    };
  
-  // ANEURALNETWORKS_GATHER_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_GATHER_EX
-  _map[ANEURALNETWORKS_GATHER_EX] = _map[ANEURALNETWORKS_GATHER];
-
    _map[ANEURALNETWORKS_NEG] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::NEG);
  
-  // ANEURALNETWORKS_NEG_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_NEG_EX
-  _map[ANEURALNETWORKS_NEG_EX] = _map[ANEURALNETWORKS_NEG];
-
    _map[ANEURALNETWORKS_ABS] = getElementwiseUnaryGenerator(operation::ElementwiseUnary::Type::ABS);
  
-  // ANEURALNETWORKS_ABS_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_ABS_EX
-  _map[ANEURALNETWORKS_ABS_EX] = _map[ANEURALNETWORKS_ABS];
-
    _map[ANEURALNETWORKS_ARGMAX] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
  
@@ -1527,10 +1278,6 @@ OperationFactory::OperationFactory()
      return new operation::ArgMinMax{inputs, outputs, param};
    };
  
-  // ANEURALNETWORKS_ARGMAX_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_ARGMAX_EX
-  _map[ANEURALNETWORKS_ARGMAX_EX] = _map[ANEURALNETWORKS_ARGMAX];
-
    _map[ANEURALNETWORKS_ARGMIN] = [](const OperationFactory::Param &init_param, Operands &) {
      assert(init_param.input_count == 2 && init_param.output_count == 1);
  
@@ -1630,10 +1377,6 @@ OperationFactory::OperationFactory()
    _map[ANEURALNETWORKS_REDUCE_MIN] =
      getReduceGenerator(onert::ir::operation::Reduce::ReduceType::MIN);
  
-  // ANEURALNETWORKS_REDUCE_MIN_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_REDUCE_MIN_EX
-  _map[ANEURALNETWORKS_REDUCE_MIN_EX] = _map[ANEURALNETWORKS_REDUCE_MIN];
-
    _map[ANEURALNETWORKS_SPLIT] = [](const OperationFactory::Param &init_param, Operands &operands) {
      assert(init_param.input_count == 3);
      assert(init_param.output_count >= 1); // At least one output tensor and axis
@@ -1668,10 +1411,6 @@ OperationFactory::OperationFactory()
      return new operation::SplitV{inputs, outputs, param};
    };
  
-  // ANEURALNETWORKS_SPLIT_EX is deprecated
-  // TODO Remove ANEURALNETWORKS_SPLIT_EX
-  _map[ANEURALNETWORKS_SPLIT_EX] = _map[ANEURALNETWORKS_SPLIT];
-
    _map[ANEURALNETWORKS_UNPACK_EX] = [](const OperationFactory::Param &init_param,
                                         Operands &operands) {
      assert(init_param.input_count == 3 && init_param.output_count >= 1);
diff --git a/runtime/onert/frontend/tflite/src/tflite_loader.cc b/runtime/onert/frontend/tflite/src/tflite_loader.cc

index a3038b718e104650d12cbedce9e0db5c48290d27..3b160473d20625bf8a9226ecbd1bc3bd07595a71 100644 (file)
--- a/runtime/onert/frontend/tflite/src/tflite_loader.cc
+++ b/runtime/onert/frontend/tflite/src/tflite_loader.cc
@@ -64,6 +64,12 @@ struct LoaderDomain
  
  class TFLiteLoader final : public base_loader::BaseLoader<LoaderDomain>
  {
+protected:
+  // Different option name
+  //  Circle: adjoint_lhs, adjoint_rhs
+  //  TFLite: adj_x, adj_y
+  void loadBatchMatMul(const Operator *op, ir::Graph &subg);
+
  public:
    using BaseLoader::BaseLoader;
  
@@ -112,8 +118,40 @@ private:
  
      return subg;
    }
+
+  void loadOperation(const onert_tflite::Operator *op, ir::Graph &subg)
+  {
+    auto const builtin_op = getBuiltinOperator(op);
+
+    switch (builtin_op)
+    {
+      case onert_tflite::BuiltinOperator::BuiltinOperator_BATCH_MATMUL:
+        loadBatchMatMul(op, subg);
+        return;
+      default:
+        BaseLoader::loadOperation(op, subg);
+        return;
+    }
+  }
  };
  
+void TFLiteLoader::loadBatchMatMul(const Operator *op, ir::Graph &subg)
+{
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(op, inputs, outputs);
+
+  ir::operation::BatchMatMul::Param param;
+  const auto *options = op->builtin_options_as_BatchMatMulOptions();
+
+  param.adj_x = options->adj_x();
+  param.adj_y = options->adj_y();
+
+  std::unique_ptr<ir::Operation> new_op(new ir::operation::BatchMatMul(inputs, outputs, param));
+  subg.addOperation(std::move(new_op));
+}
+
  } // namespace
  
  std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
diff --git a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h

index 8e1b84e295a028b824b572f12221532ed0299e89..cec5bce7468e633a0164e5e00514f9a94150cb3b 100644 (file)
--- a/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
+++ b/runtime/onert/frontend/tflite/src/tflite_schema_generated.h
@@ -26,236 +26,396 @@ namespace onert_tflite
  {
  
  struct CustomQuantization;
+struct CustomQuantizationBuilder;
  
  struct QuantizationParameters;
+struct QuantizationParametersBuilder;
  
  struct Int32Vector;
+struct Int32VectorBuilder;
  
  struct Uint16Vector;
+struct Uint16VectorBuilder;
  
  struct Uint8Vector;
+struct Uint8VectorBuilder;
  
  struct DimensionMetadata;
+struct DimensionMetadataBuilder;
  
  struct SparsityParameters;
+struct SparsityParametersBuilder;
  
  struct Tensor;
+struct TensorBuilder;
  
  struct Conv2DOptions;
+struct Conv2DOptionsBuilder;
+
+struct Conv3DOptions;
+struct Conv3DOptionsBuilder;
  
  struct Pool2DOptions;
+struct Pool2DOptionsBuilder;
  
  struct DepthwiseConv2DOptions;
+struct DepthwiseConv2DOptionsBuilder;
  
  struct ConcatEmbeddingsOptions;
+struct ConcatEmbeddingsOptionsBuilder;
  
  struct LSHProjectionOptions;
+struct LSHProjectionOptionsBuilder;
  
  struct SVDFOptions;
+struct SVDFOptionsBuilder;
  
  struct RNNOptions;
+struct RNNOptionsBuilder;
  
  struct SequenceRNNOptions;
+struct SequenceRNNOptionsBuilder;
  
  struct BidirectionalSequenceRNNOptions;
+struct BidirectionalSequenceRNNOptionsBuilder;
  
  struct FullyConnectedOptions;
+struct FullyConnectedOptionsBuilder;
  
  struct SoftmaxOptions;
+struct SoftmaxOptionsBuilder;
  
  struct ConcatenationOptions;
+struct ConcatenationOptionsBuilder;
  
  struct AddOptions;
+struct AddOptionsBuilder;
  
  struct MulOptions;
+struct MulOptionsBuilder;
  
  struct L2NormOptions;
+struct L2NormOptionsBuilder;
  
  struct LocalResponseNormalizationOptions;
+struct LocalResponseNormalizationOptionsBuilder;
  
  struct LSTMOptions;
+struct LSTMOptionsBuilder;
  
  struct UnidirectionalSequenceLSTMOptions;
+struct UnidirectionalSequenceLSTMOptionsBuilder;
  
  struct BidirectionalSequenceLSTMOptions;
+struct BidirectionalSequenceLSTMOptionsBuilder;
  
  struct ResizeBilinearOptions;
+struct ResizeBilinearOptionsBuilder;
  
  struct ResizeNearestNeighborOptions;
+struct ResizeNearestNeighborOptionsBuilder;
  
  struct CallOptions;
+struct CallOptionsBuilder;
  
  struct PadOptions;
+struct PadOptionsBuilder;
  
  struct PadV2Options;
+struct PadV2OptionsBuilder;
  
  struct ReshapeOptions;
+struct ReshapeOptionsBuilder;
  
  struct SpaceToBatchNDOptions;
+struct SpaceToBatchNDOptionsBuilder;
  
  struct BatchToSpaceNDOptions;
+struct BatchToSpaceNDOptionsBuilder;
  
  struct SkipGramOptions;
+struct SkipGramOptionsBuilder;
  
  struct SpaceToDepthOptions;
+struct SpaceToDepthOptionsBuilder;
  
  struct DepthToSpaceOptions;
+struct DepthToSpaceOptionsBuilder;
  
  struct SubOptions;
+struct SubOptionsBuilder;
  
  struct DivOptions;
+struct DivOptionsBuilder;
  
  struct TopKV2Options;
+struct TopKV2OptionsBuilder;
  
  struct EmbeddingLookupSparseOptions;
+struct EmbeddingLookupSparseOptionsBuilder;
  
  struct GatherOptions;
+struct GatherOptionsBuilder;
  
  struct TransposeOptions;
+struct TransposeOptionsBuilder;
  
  struct ExpOptions;
+struct ExpOptionsBuilder;
  
  struct CosOptions;
+struct CosOptionsBuilder;
  
  struct ReducerOptions;
+struct ReducerOptionsBuilder;
  
  struct SqueezeOptions;
+struct SqueezeOptionsBuilder;
  
  struct SplitOptions;
+struct SplitOptionsBuilder;
  
  struct SplitVOptions;
+struct SplitVOptionsBuilder;
  
  struct StridedSliceOptions;
+struct StridedSliceOptionsBuilder;
  
  struct LogSoftmaxOptions;
+struct LogSoftmaxOptionsBuilder;
  
  struct CastOptions;
+struct CastOptionsBuilder;
  
  struct DequantizeOptions;
+struct DequantizeOptionsBuilder;
  
  struct MaximumMinimumOptions;
+struct MaximumMinimumOptionsBuilder;
  
  struct TileOptions;
+struct TileOptionsBuilder;
  
  struct ArgMaxOptions;
+struct ArgMaxOptionsBuilder;
  
  struct ArgMinOptions;
+struct ArgMinOptionsBuilder;
  
  struct GreaterOptions;
+struct GreaterOptionsBuilder;
  
  struct GreaterEqualOptions;
+struct GreaterEqualOptionsBuilder;
  
  struct LessOptions;
+struct LessOptionsBuilder;
  
  struct LessEqualOptions;
+struct LessEqualOptionsBuilder;
  
  struct NegOptions;
+struct NegOptionsBuilder;
  
  struct SelectOptions;
+struct SelectOptionsBuilder;
  
  struct SliceOptions;
+struct SliceOptionsBuilder;
  
  struct TransposeConvOptions;
+struct TransposeConvOptionsBuilder;
  
  struct ExpandDimsOptions;
+struct ExpandDimsOptionsBuilder;
  
  struct SparseToDenseOptions;
+struct SparseToDenseOptionsBuilder;
  
  struct EqualOptions;
+struct EqualOptionsBuilder;
  
  struct NotEqualOptions;
+struct NotEqualOptionsBuilder;
  
  struct ShapeOptions;
+struct ShapeOptionsBuilder;
  
  struct RankOptions;
+struct RankOptionsBuilder;
  
  struct PowOptions;
+struct PowOptionsBuilder;
  
  struct FakeQuantOptions;
+struct FakeQuantOptionsBuilder;
  
  struct PackOptions;
+struct PackOptionsBuilder;
  
  struct LogicalOrOptions;
+struct LogicalOrOptionsBuilder;
  
  struct OneHotOptions;
+struct OneHotOptionsBuilder;
  
  struct AbsOptions;
+struct AbsOptionsBuilder;
  
  struct HardSwishOptions;
+struct HardSwishOptionsBuilder;
  
  struct LogicalAndOptions;
+struct LogicalAndOptionsBuilder;
  
  struct LogicalNotOptions;
+struct LogicalNotOptionsBuilder;
  
  struct UnpackOptions;
+struct UnpackOptionsBuilder;
  
  struct FloorDivOptions;
+struct FloorDivOptionsBuilder;
  
  struct SquareOptions;
+struct SquareOptionsBuilder;
  
  struct ZerosLikeOptions;
+struct ZerosLikeOptionsBuilder;
  
  struct FillOptions;
+struct FillOptionsBuilder;
  
  struct FloorModOptions;
+struct FloorModOptionsBuilder;
  
  struct RangeOptions;
+struct RangeOptionsBuilder;
  
  struct LeakyReluOptions;
+struct LeakyReluOptionsBuilder;
  
  struct SquaredDifferenceOptions;
+struct SquaredDifferenceOptionsBuilder;
  
  struct MirrorPadOptions;
+struct MirrorPadOptionsBuilder;
  
  struct UniqueOptions;
+struct UniqueOptionsBuilder;
  
  struct ReverseV2Options;
+struct ReverseV2OptionsBuilder;
  
  struct AddNOptions;
+struct AddNOptionsBuilder;
  
  struct GatherNdOptions;
+struct GatherNdOptionsBuilder;
  
  struct WhereOptions;
+struct WhereOptionsBuilder;
  
  struct ReverseSequenceOptions;
+struct ReverseSequenceOptionsBuilder;
  
  struct MatrixDiagOptions;
+struct MatrixDiagOptionsBuilder;
  
  struct QuantizeOptions;
+struct QuantizeOptionsBuilder;
  
  struct MatrixSetDiagOptions;
+struct MatrixSetDiagOptionsBuilder;
  
  struct IfOptions;
+struct IfOptionsBuilder;
+
+struct CallOnceOptions;
+struct CallOnceOptionsBuilder;
  
  struct WhileOptions;
+struct WhileOptionsBuilder;
  
  struct NonMaxSuppressionV4Options;
+struct NonMaxSuppressionV4OptionsBuilder;
  
  struct NonMaxSuppressionV5Options;
+struct NonMaxSuppressionV5OptionsBuilder;
  
  struct ScatterNdOptions;
+struct ScatterNdOptionsBuilder;
  
  struct SelectV2Options;
+struct SelectV2OptionsBuilder;
  
  struct DensifyOptions;
+struct DensifyOptionsBuilder;
  
  struct SegmentSumOptions;
+struct SegmentSumOptionsBuilder;
  
  struct BatchMatMulOptions;
+struct BatchMatMulOptionsBuilder;
+
+struct CumsumOptions;
+struct CumsumOptionsBuilder;
+
+struct BroadcastToOptions;
+struct BroadcastToOptionsBuilder;
+
+struct Rfft2dOptions;
+struct Rfft2dOptionsBuilder;
+
+struct HashtableOptions;
+struct HashtableOptionsBuilder;
+
+struct HashtableFindOptions;
+struct HashtableFindOptionsBuilder;
+
+struct HashtableImportOptions;
+struct HashtableImportOptionsBuilder;
+
+struct HashtableSizeOptions;
+struct HashtableSizeOptionsBuilder;
+
+struct VarHandleOptions;
+struct VarHandleOptionsBuilder;
+
+struct ReadVariableOptions;
+struct ReadVariableOptionsBuilder;
+
+struct AssignVariableOptions;
+struct AssignVariableOptionsBuilder;
+
+struct RandomOptions;
+struct RandomOptionsBuilder;
  
  struct OperatorCode;
+struct OperatorCodeBuilder;
  
  struct Operator;
+struct OperatorBuilder;
  
  struct SubGraph;
+struct SubGraphBuilder;
  
  struct Buffer;
+struct BufferBuilder;
  
  struct Metadata;
+struct MetadataBuilder;
+
+struct TensorMap;
+struct TensorMapBuilder;
+
+struct SignatureDef;
+struct SignatureDefBuilder;
  
  struct Model;
+struct ModelBuilder;
  
-enum TensorType
+enum TensorType : int8_t
  {
    TensorType_FLOAT32 = 0,
    TensorType_FLOAT16 = 1,
@@ -268,34 +428,43 @@ enum TensorType
    TensorType_COMPLEX64 = 8,
    TensorType_INT8 = 9,
    TensorType_FLOAT64 = 10,
+  TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
+  TensorType_RESOURCE = 13,
+  TensorType_VARIANT = 14,
+  TensorType_UINT32 = 15,
    TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_FLOAT64
+  TensorType_MAX = TensorType_UINT32
  };
  
-inline const TensorType (&EnumValuesTensorType())[11]
+inline const TensorType (&EnumValuesTensorType())[16]
  {
-  static const TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32,
-                                      TensorType_UINT8,   TensorType_INT64,   TensorType_STRING,
-                                      TensorType_BOOL,    TensorType_INT16,   TensorType_COMPLEX64,
-                                      TensorType_INT8,    TensorType_FLOAT64};
+  static const TensorType values[] = {
+    TensorType_FLOAT32,   TensorType_FLOAT16,  TensorType_INT32,   TensorType_UINT8,
+    TensorType_INT64,     TensorType_STRING,   TensorType_BOOL,    TensorType_INT16,
+    TensorType_COMPLEX64, TensorType_INT8,     TensorType_FLOAT64, TensorType_COMPLEX128,
+    TensorType_UINT64,    TensorType_RESOURCE, TensorType_VARIANT, TensorType_UINT32};
    return values;
  }
  
  inline const char *const *EnumNamesTensorType()
  {
-  static const char *const names[] = {"FLOAT32",   "FLOAT16", "INT32",   "UINT8",
-                                      "INT64",     "STRING",  "BOOL",    "INT16",
-                                      "COMPLEX64", "INT8",    "FLOAT64", nullptr};
+  static const char *const names[17] = {"FLOAT32", "FLOAT16",    "INT32",  "UINT8",     "INT64",
+                                        "STRING",  "BOOL",       "INT16",  "COMPLEX64", "INT8",
+                                        "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE",  "VARIANT",
+                                        "UINT32",  nullptr};
    return names;
  }
  
  inline const char *EnumNameTensorType(TensorType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT32))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesTensorType()[index];
  }
  
-enum QuantizationDetails
+enum QuantizationDetails : uint8_t
  {
    QuantizationDetails_NONE = 0,
    QuantizationDetails_CustomQuantization = 1,
@@ -312,13 +481,15 @@ inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
  
  inline const char *const *EnumNamesQuantizationDetails()
  {
-  static const char *const names[] = {"NONE", "CustomQuantization", nullptr};
+  static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
    return names;
  }
  
  inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, QuantizationDetails_NONE, QuantizationDetails_CustomQuantization))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesQuantizationDetails()[index];
  }
  
@@ -327,7 +498,7 @@ template <typename T> struct QuantizationDetailsTraits
    static const QuantizationDetails enum_value = QuantizationDetails_NONE;
  };
  
-template <> struct QuantizationDetailsTraits<CustomQuantization>
+template <> struct QuantizationDetailsTraits<onert_tflite::CustomQuantization>
  {
    static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
  };
@@ -338,7 +509,7 @@ bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier,
                                       const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                       const flatbuffers::Vector<uint8_t> *types);
  
-enum DimensionType
+enum DimensionType : int8_t
  {
    DimensionType_DENSE = 0,
    DimensionType_SPARSE_CSR = 1,
@@ -354,17 +525,19 @@ inline const DimensionType (&EnumValuesDimensionType())[2]
  
  inline const char *const *EnumNamesDimensionType()
  {
-  static const char *const names[] = {"DENSE", "SPARSE_CSR", nullptr};
+  static const char *const names[3] = {"DENSE", "SPARSE_CSR", nullptr};
    return names;
  }
  
  inline const char *EnumNameDimensionType(DimensionType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, DimensionType_DENSE, DimensionType_SPARSE_CSR))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesDimensionType()[index];
  }
  
-enum SparseIndexVector
+enum SparseIndexVector : uint8_t
  {
    SparseIndexVector_NONE = 0,
    SparseIndexVector_Int32Vector = 1,
@@ -384,14 +557,16 @@ inline const SparseIndexVector (&EnumValuesSparseIndexVector())[4]
  
  inline const char *const *EnumNamesSparseIndexVector()
  {
-  static const char *const names[] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
-                                      nullptr};
+  static const char *const names[5] = {"NONE", "Int32Vector", "Uint16Vector", "Uint8Vector",
+                                       nullptr};
    return names;
  }
  
  inline const char *EnumNameSparseIndexVector(SparseIndexVector e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, SparseIndexVector_NONE, SparseIndexVector_Uint8Vector))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesSparseIndexVector()[index];
  }
  
@@ -400,17 +575,17 @@ template <typename T> struct SparseIndexVectorTraits
    static const SparseIndexVector enum_value = SparseIndexVector_NONE;
  };
  
-template <> struct SparseIndexVectorTraits<Int32Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Int32Vector>
  {
    static const SparseIndexVector enum_value = SparseIndexVector_Int32Vector;
  };
  
-template <> struct SparseIndexVectorTraits<Uint16Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint16Vector>
  {
    static const SparseIndexVector enum_value = SparseIndexVector_Uint16Vector;
  };
  
-template <> struct SparseIndexVectorTraits<Uint8Vector>
+template <> struct SparseIndexVectorTraits<onert_tflite::Uint8Vector>
  {
    static const SparseIndexVector enum_value = SparseIndexVector_Uint8Vector;
  };
@@ -421,7 +596,7 @@ bool VerifySparseIndexVectorVector(flatbuffers::Verifier &verifier,
                                     const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                     const flatbuffers::Vector<uint8_t> *types);
  
-enum BuiltinOperator
+enum BuiltinOperator : int32_t
  {
    BuiltinOperator_ADD = 0,
    BuiltinOperator_AVERAGE_POOL_2D = 1,
@@ -550,11 +725,31 @@ enum BuiltinOperator
    BuiltinOperator_DENSIFY = 124,
    BuiltinOperator_SEGMENT_SUM = 125,
    BuiltinOperator_BATCH_MATMUL = 126,
+  BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  BuiltinOperator_CUMSUM = 128,
+  BuiltinOperator_CALL_ONCE = 129,
+  BuiltinOperator_BROADCAST_TO = 130,
+  BuiltinOperator_RFFT2D = 131,
+  BuiltinOperator_CONV_3D = 132,
+  BuiltinOperator_IMAG = 133,
+  BuiltinOperator_REAL = 134,
+  BuiltinOperator_COMPLEX_ABS = 135,
+  BuiltinOperator_HASHTABLE = 136,
+  BuiltinOperator_HASHTABLE_FIND = 137,
+  BuiltinOperator_HASHTABLE_IMPORT = 138,
+  BuiltinOperator_HASHTABLE_SIZE = 139,
+  BuiltinOperator_REDUCE_ALL = 140,
+  BuiltinOperator_CONV_3D_TRANSPOSE = 141,
+  BuiltinOperator_VAR_HANDLE = 142,
+  BuiltinOperator_READ_VARIABLE = 143,
+  BuiltinOperator_ASSIGN_VARIABLE = 144,
+  BuiltinOperator_BROADCAST_ARGS = 145,
+  BuiltinOperator_RANDOM_STANDARD_NORMAL = 146,
    BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL
+  BuiltinOperator_MAX = BuiltinOperator_RANDOM_STANDARD_NORMAL
  };
  
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[147]
  {
    static const BuiltinOperator values[] = {BuiltinOperator_ADD,
                                             BuiltinOperator_AVERAGE_POOL_2D,
@@ -682,150 +877,192 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127]
                                             BuiltinOperator_SELECT_V2,
                                             BuiltinOperator_DENSIFY,
                                             BuiltinOperator_SEGMENT_SUM,
-                                           BuiltinOperator_BATCH_MATMUL};
+                                           BuiltinOperator_BATCH_MATMUL,
+                                           BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES,
+                                           BuiltinOperator_CUMSUM,
+                                           BuiltinOperator_CALL_ONCE,
+                                           BuiltinOperator_BROADCAST_TO,
+                                           BuiltinOperator_RFFT2D,
+                                           BuiltinOperator_CONV_3D,
+                                           BuiltinOperator_IMAG,
+                                           BuiltinOperator_REAL,
+                                           BuiltinOperator_COMPLEX_ABS,
+                                           BuiltinOperator_HASHTABLE,
+                                           BuiltinOperator_HASHTABLE_FIND,
+                                           BuiltinOperator_HASHTABLE_IMPORT,
+                                           BuiltinOperator_HASHTABLE_SIZE,
+                                           BuiltinOperator_REDUCE_ALL,
+                                           BuiltinOperator_CONV_3D_TRANSPOSE,
+                                           BuiltinOperator_VAR_HANDLE,
+                                           BuiltinOperator_READ_VARIABLE,
+                                           BuiltinOperator_ASSIGN_VARIABLE,
+                                           BuiltinOperator_BROADCAST_ARGS,
+                                           BuiltinOperator_RANDOM_STANDARD_NORMAL};
    return values;
  }
  
  inline const char *const *EnumNamesBuiltinOperator()
  {
-  static const char *const names[] = {"ADD",
-                                      "AVERAGE_POOL_2D",
-                                      "CONCATENATION",
-                                      "CONV_2D",
-                                      "DEPTHWISE_CONV_2D",
-                                      "DEPTH_TO_SPACE",
-                                      "DEQUANTIZE",
-                                      "EMBEDDING_LOOKUP",
-                                      "FLOOR",
-                                      "FULLY_CONNECTED",
-                                      "HASHTABLE_LOOKUP",
-                                      "L2_NORMALIZATION",
-                                      "L2_POOL_2D",
-                                      "LOCAL_RESPONSE_NORMALIZATION",
-                                      "LOGISTIC",
-                                      "LSH_PROJECTION",
-                                      "LSTM",
-                                      "MAX_POOL_2D",
-                                      "MUL",
-                                      "RELU",
-                                      "RELU_N1_TO_1",
-                                      "RELU6",
-                                      "RESHAPE",
-                                      "RESIZE_BILINEAR",
-                                      "RNN",
-                                      "SOFTMAX",
-                                      "SPACE_TO_DEPTH",
-                                      "SVDF",
-                                      "TANH",
-                                      "CONCAT_EMBEDDINGS",
-                                      "SKIP_GRAM",
-                                      "CALL",
-                                      "CUSTOM",
-                                      "EMBEDDING_LOOKUP_SPARSE",
-                                      "PAD",
-                                      "UNIDIRECTIONAL_SEQUENCE_RNN",
-                                      "GATHER",
-                                      "BATCH_TO_SPACE_ND",
-                                      "SPACE_TO_BATCH_ND",
-                                      "TRANSPOSE",
-                                      "MEAN",
-                                      "SUB",
-                                      "DIV",
-                                      "SQUEEZE",
-                                      "UNIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "STRIDED_SLICE",
-                                      "BIDIRECTIONAL_SEQUENCE_RNN",
-                                      "EXP",
-                                      "TOPK_V2",
-                                      "SPLIT",
-                                      "LOG_SOFTMAX",
-                                      "DELEGATE",
-                                      "BIDIRECTIONAL_SEQUENCE_LSTM",
-                                      "CAST",
-                                      "PRELU",
-                                      "MAXIMUM",
-                                      "ARG_MAX",
-                                      "MINIMUM",
-                                      "LESS",
-                                      "NEG",
-                                      "PADV2",
-                                      "GREATER",
-                                      "GREATER_EQUAL",
-                                      "LESS_EQUAL",
-                                      "SELECT",
-                                      "SLICE",
-                                      "SIN",
-                                      "TRANSPOSE_CONV",
-                                      "SPARSE_TO_DENSE",
-                                      "TILE",
-                                      "EXPAND_DIMS",
-                                      "EQUAL",
-                                      "NOT_EQUAL",
-                                      "LOG",
-                                      "SUM",
-                                      "SQRT",
-                                      "RSQRT",
-                                      "SHAPE",
-                                      "POW",
-                                      "ARG_MIN",
-                                      "FAKE_QUANT",
-                                      "REDUCE_PROD",
-                                      "REDUCE_MAX",
-                                      "PACK",
-                                      "LOGICAL_OR",
-                                      "ONE_HOT",
-                                      "LOGICAL_AND",
-                                      "LOGICAL_NOT",
-                                      "UNPACK",
-                                      "REDUCE_MIN",
-                                      "FLOOR_DIV",
-                                      "REDUCE_ANY",
-                                      "SQUARE",
-                                      "ZEROS_LIKE",
-                                      "FILL",
-                                      "FLOOR_MOD",
-                                      "RANGE",
-                                      "RESIZE_NEAREST_NEIGHBOR",
-                                      "LEAKY_RELU",
-                                      "SQUARED_DIFFERENCE",
-                                      "MIRROR_PAD",
-                                      "ABS",
-                                      "SPLIT_V",
-                                      "UNIQUE",
-                                      "CEIL",
-                                      "REVERSE_V2",
-                                      "ADD_N",
-                                      "GATHER_ND",
-                                      "COS",
-                                      "WHERE",
-                                      "RANK",
-                                      "ELU",
-                                      "REVERSE_SEQUENCE",
-                                      "MATRIX_DIAG",
-                                      "QUANTIZE",
-                                      "MATRIX_SET_DIAG",
-                                      "ROUND",
-                                      "HARD_SWISH",
-                                      "IF",
-                                      "WHILE",
-                                      "NON_MAX_SUPPRESSION_V4",
-                                      "NON_MAX_SUPPRESSION_V5",
-                                      "SCATTER_ND",
-                                      "SELECT_V2",
-                                      "DENSIFY",
-                                      "SEGMENT_SUM",
-                                      "BATCH_MATMUL",
-                                      nullptr};
+  static const char *const names[148] = {"ADD",
+                                         "AVERAGE_POOL_2D",
+                                         "CONCATENATION",
+                                         "CONV_2D",
+                                         "DEPTHWISE_CONV_2D",
+                                         "DEPTH_TO_SPACE",
+                                         "DEQUANTIZE",
+                                         "EMBEDDING_LOOKUP",
+                                         "FLOOR",
+                                         "FULLY_CONNECTED",
+                                         "HASHTABLE_LOOKUP",
+                                         "L2_NORMALIZATION",
+                                         "L2_POOL_2D",
+                                         "LOCAL_RESPONSE_NORMALIZATION",
+                                         "LOGISTIC",
+                                         "LSH_PROJECTION",
+                                         "LSTM",
+                                         "MAX_POOL_2D",
+                                         "MUL",
+                                         "RELU",
+                                         "RELU_N1_TO_1",
+                                         "RELU6",
+                                         "RESHAPE",
+                                         "RESIZE_BILINEAR",
+                                         "RNN",
+                                         "SOFTMAX",
+                                         "SPACE_TO_DEPTH",
+                                         "SVDF",
+                                         "TANH",
+                                         "CONCAT_EMBEDDINGS",
+                                         "SKIP_GRAM",
+                                         "CALL",
+                                         "CUSTOM",
+                                         "EMBEDDING_LOOKUP_SPARSE",
+                                         "PAD",
+                                         "UNIDIRECTIONAL_SEQUENCE_RNN",
+                                         "GATHER",
+                                         "BATCH_TO_SPACE_ND",
+                                         "SPACE_TO_BATCH_ND",
+                                         "TRANSPOSE",
+                                         "MEAN",
+                                         "SUB",
+                                         "DIV",
+                                         "SQUEEZE",
+                                         "UNIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "STRIDED_SLICE",
+                                         "BIDIRECTIONAL_SEQUENCE_RNN",
+                                         "EXP",
+                                         "TOPK_V2",
+                                         "SPLIT",
+                                         "LOG_SOFTMAX",
+                                         "DELEGATE",
+                                         "BIDIRECTIONAL_SEQUENCE_LSTM",
+                                         "CAST",
+                                         "PRELU",
+                                         "MAXIMUM",
+                                         "ARG_MAX",
+                                         "MINIMUM",
+                                         "LESS",
+                                         "NEG",
+                                         "PADV2",
+                                         "GREATER",
+                                         "GREATER_EQUAL",
+                                         "LESS_EQUAL",
+                                         "SELECT",
+                                         "SLICE",
+                                         "SIN",
+                                         "TRANSPOSE_CONV",
+                                         "SPARSE_TO_DENSE",
+                                         "TILE",
+                                         "EXPAND_DIMS",
+                                         "EQUAL",
+                                         "NOT_EQUAL",
+                                         "LOG",
+                                         "SUM",
+                                         "SQRT",
+                                         "RSQRT",
+                                         "SHAPE",
+                                         "POW",
+                                         "ARG_MIN",
+                                         "FAKE_QUANT",
+                                         "REDUCE_PROD",
+                                         "REDUCE_MAX",
+                                         "PACK",
+                                         "LOGICAL_OR",
+                                         "ONE_HOT",
+                                         "LOGICAL_AND",
+                                         "LOGICAL_NOT",
+                                         "UNPACK",
+                                         "REDUCE_MIN",
+                                         "FLOOR_DIV",
+                                         "REDUCE_ANY",
+                                         "SQUARE",
+                                         "ZEROS_LIKE",
+                                         "FILL",
+                                         "FLOOR_MOD",
+                                         "RANGE",
+                                         "RESIZE_NEAREST_NEIGHBOR",
+                                         "LEAKY_RELU",
+                                         "SQUARED_DIFFERENCE",
+                                         "MIRROR_PAD",
+                                         "ABS",
+                                         "SPLIT_V",
+                                         "UNIQUE",
+                                         "CEIL",
+                                         "REVERSE_V2",
+                                         "ADD_N",
+                                         "GATHER_ND",
+                                         "COS",
+                                         "WHERE",
+                                         "RANK",
+                                         "ELU",
+                                         "REVERSE_SEQUENCE",
+                                         "MATRIX_DIAG",
+                                         "QUANTIZE",
+                                         "MATRIX_SET_DIAG",
+                                         "ROUND",
+                                         "HARD_SWISH",
+                                         "IF",
+                                         "WHILE",
+                                         "NON_MAX_SUPPRESSION_V4",
+                                         "NON_MAX_SUPPRESSION_V5",
+                                         "SCATTER_ND",
+                                         "SELECT_V2",
+                                         "DENSIFY",
+                                         "SEGMENT_SUM",
+                                         "BATCH_MATMUL",
+                                         "PLACEHOLDER_FOR_GREATER_OP_CODES",
+                                         "CUMSUM",
+                                         "CALL_ONCE",
+                                         "BROADCAST_TO",
+                                         "RFFT2D",
+                                         "CONV_3D",
+                                         "IMAG",
+                                         "REAL",
+                                         "COMPLEX_ABS",
+                                         "HASHTABLE",
+                                         "HASHTABLE_FIND",
+                                         "HASHTABLE_IMPORT",
+                                         "HASHTABLE_SIZE",
+                                         "REDUCE_ALL",
+                                         "CONV_3D_TRANSPOSE",
+                                         "VAR_HANDLE",
+                                         "READ_VARIABLE",
+                                         "ASSIGN_VARIABLE",
+                                         "BROADCAST_ARGS",
+                                         "RANDOM_STANDARD_NORMAL",
+                                         nullptr};
    return names;
  }
  
  inline const char *EnumNameBuiltinOperator(BuiltinOperator e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_RANDOM_STANDARD_NORMAL))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesBuiltinOperator()[index];
  }
  
-enum BuiltinOptions
+enum BuiltinOptions : uint8_t
  {
    BuiltinOptions_NONE = 0,
    BuiltinOptions_Conv2DOptions = 1,
@@ -929,11 +1166,24 @@ enum BuiltinOptions
    BuiltinOptions_DensifyOptions = 99,
    BuiltinOptions_SegmentSumOptions = 100,
    BuiltinOptions_BatchMatMulOptions = 101,
+  BuiltinOptions_CumsumOptions = 102,
+  BuiltinOptions_CallOnceOptions = 103,
+  BuiltinOptions_BroadcastToOptions = 104,
+  BuiltinOptions_Rfft2dOptions = 105,
+  BuiltinOptions_Conv3DOptions = 106,
+  BuiltinOptions_HashtableOptions = 107,
+  BuiltinOptions_HashtableFindOptions = 108,
+  BuiltinOptions_HashtableImportOptions = 109,
+  BuiltinOptions_HashtableSizeOptions = 110,
+  BuiltinOptions_VarHandleOptions = 111,
+  BuiltinOptions_ReadVariableOptions = 112,
+  BuiltinOptions_AssignVariableOptions = 113,
+  BuiltinOptions_RandomOptions = 114,
    BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions
+  BuiltinOptions_MAX = BuiltinOptions_RandomOptions
  };
  
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115]
  {
    static const BuiltinOptions values[] = {BuiltinOptions_NONE,
                                            BuiltinOptions_Conv2DOptions,
@@ -1036,121 +1286,149 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102]
                                            BuiltinOptions_SelectV2Options,
                                            BuiltinOptions_DensifyOptions,
                                            BuiltinOptions_SegmentSumOptions,
-                                          BuiltinOptions_BatchMatMulOptions};
+                                          BuiltinOptions_BatchMatMulOptions,
+                                          BuiltinOptions_CumsumOptions,
+                                          BuiltinOptions_CallOnceOptions,
+                                          BuiltinOptions_BroadcastToOptions,
+                                          BuiltinOptions_Rfft2dOptions,
+                                          BuiltinOptions_Conv3DOptions,
+                                          BuiltinOptions_HashtableOptions,
+                                          BuiltinOptions_HashtableFindOptions,
+                                          BuiltinOptions_HashtableImportOptions,
+                                          BuiltinOptions_HashtableSizeOptions,
+                                          BuiltinOptions_VarHandleOptions,
+                                          BuiltinOptions_ReadVariableOptions,
+                                          BuiltinOptions_AssignVariableOptions,
+                                          BuiltinOptions_RandomOptions};
    return values;
  }
  
  inline const char *const *EnumNamesBuiltinOptions()
  {
-  static const char *const names[] = {"NONE",
-                                      "Conv2DOptions",
-                                      "DepthwiseConv2DOptions",
-                                      "ConcatEmbeddingsOptions",
-                                      "LSHProjectionOptions",
-                                      "Pool2DOptions",
-                                      "SVDFOptions",
-                                      "RNNOptions",
-                                      "FullyConnectedOptions",
-                                      "SoftmaxOptions",
-                                      "ConcatenationOptions",
-                                      "AddOptions",
-                                      "L2NormOptions",
-                                      "LocalResponseNormalizationOptions",
-                                      "LSTMOptions",
-                                      "ResizeBilinearOptions",
-                                      "CallOptions",
-                                      "ReshapeOptions",
-                                      "SkipGramOptions",
-                                      "SpaceToDepthOptions",
-                                      "EmbeddingLookupSparseOptions",
-                                      "MulOptions",
-                                      "PadOptions",
-                                      "GatherOptions",
-                                      "BatchToSpaceNDOptions",
-                                      "SpaceToBatchNDOptions",
-                                      "TransposeOptions",
-                                      "ReducerOptions",
-                                      "SubOptions",
-                                      "DivOptions",
-                                      "SqueezeOptions",
-                                      "SequenceRNNOptions",
-                                      "StridedSliceOptions",
-                                      "ExpOptions",
-                                      "TopKV2Options",
-                                      "SplitOptions",
-                                      "LogSoftmaxOptions",
-                                      "CastOptions",
-                                      "DequantizeOptions",
-                                      "MaximumMinimumOptions",
-                                      "ArgMaxOptions",
-                                      "LessOptions",
-                                      "NegOptions",
-                                      "PadV2Options",
-                                      "GreaterOptions",
-                                      "GreaterEqualOptions",
-                                      "LessEqualOptions",
-                                      "SelectOptions",
-                                      "SliceOptions",
-                                      "TransposeConvOptions",
-                                      "SparseToDenseOptions",
-                                      "TileOptions",
-                                      "ExpandDimsOptions",
-                                      "EqualOptions",
-                                      "NotEqualOptions",
-                                      "ShapeOptions",
-                                      "PowOptions",
-                                      "ArgMinOptions",
-                                      "FakeQuantOptions",
-                                      "PackOptions",
-                                      "LogicalOrOptions",
-                                      "OneHotOptions",
-                                      "LogicalAndOptions",
-                                      "LogicalNotOptions",
-                                      "UnpackOptions",
-                                      "FloorDivOptions",
-                                      "SquareOptions",
-                                      "ZerosLikeOptions",
-                                      "FillOptions",
-                                      "BidirectionalSequenceLSTMOptions",
-                                      "BidirectionalSequenceRNNOptions",
-                                      "UnidirectionalSequenceLSTMOptions",
-                                      "FloorModOptions",
-                                      "RangeOptions",
-                                      "ResizeNearestNeighborOptions",
-                                      "LeakyReluOptions",
-                                      "SquaredDifferenceOptions",
-                                      "MirrorPadOptions",
-                                      "AbsOptions",
-                                      "SplitVOptions",
-                                      "UniqueOptions",
-                                      "ReverseV2Options",
-                                      "AddNOptions",
-                                      "GatherNdOptions",
-                                      "CosOptions",
-                                      "WhereOptions",
-                                      "RankOptions",
-                                      "ReverseSequenceOptions",
-                                      "MatrixDiagOptions",
-                                      "QuantizeOptions",
-                                      "MatrixSetDiagOptions",
-                                      "HardSwishOptions",
-                                      "IfOptions",
-                                      "WhileOptions",
-                                      "DepthToSpaceOptions",
-                                      "NonMaxSuppressionV4Options",
-                                      "NonMaxSuppressionV5Options",
-                                      "ScatterNdOptions",
-                                      "SelectV2Options",
-                                      "DensifyOptions",
-                                      "SegmentSumOptions",
-                                      "BatchMatMulOptions",
-                                      nullptr};
+  static const char *const names[116] = {"NONE",
+                                         "Conv2DOptions",
+                                         "DepthwiseConv2DOptions",
+                                         "ConcatEmbeddingsOptions",
+                                         "LSHProjectionOptions",
+                                         "Pool2DOptions",
+                                         "SVDFOptions",
+                                         "RNNOptions",
+                                         "FullyConnectedOptions",
+                                         "SoftmaxOptions",
+                                         "ConcatenationOptions",
+                                         "AddOptions",
+                                         "L2NormOptions",
+                                         "LocalResponseNormalizationOptions",
+                                         "LSTMOptions",
+                                         "ResizeBilinearOptions",
+                                         "CallOptions",
+                                         "ReshapeOptions",
+                                         "SkipGramOptions",
+                                         "SpaceToDepthOptions",
+                                         "EmbeddingLookupSparseOptions",
+                                         "MulOptions",
+                                         "PadOptions",
+                                         "GatherOptions",
+                                         "BatchToSpaceNDOptions",
+                                         "SpaceToBatchNDOptions",
+                                         "TransposeOptions",
+                                         "ReducerOptions",
+                                         "SubOptions",
+                                         "DivOptions",
+                                         "SqueezeOptions",
+                                         "SequenceRNNOptions",
+                                         "StridedSliceOptions",
+                                         "ExpOptions",
+                                         "TopKV2Options",
+                                         "SplitOptions",
+                                         "LogSoftmaxOptions",
+                                         "CastOptions",
+                                         "DequantizeOptions",
+                                         "MaximumMinimumOptions",
+                                         "ArgMaxOptions",
+                                         "LessOptions",
+                                         "NegOptions",
+                                         "PadV2Options",
+                                         "GreaterOptions",
+                                         "GreaterEqualOptions",
+                                         "LessEqualOptions",
+                                         "SelectOptions",
+                                         "SliceOptions",
+                                         "TransposeConvOptions",
+                                         "SparseToDenseOptions",
+                                         "TileOptions",
+                                         "ExpandDimsOptions",
+                                         "EqualOptions",
+                                         "NotEqualOptions",
+                                         "ShapeOptions",
+                                         "PowOptions",
+                                         "ArgMinOptions",
+                                         "FakeQuantOptions",
+                                         "PackOptions",
+                                         "LogicalOrOptions",
+                                         "OneHotOptions",
+                                         "LogicalAndOptions",
+                                         "LogicalNotOptions",
+                                         "UnpackOptions",
+                                         "FloorDivOptions",
+                                         "SquareOptions",
+                                         "ZerosLikeOptions",
+                                         "FillOptions",
+                                         "BidirectionalSequenceLSTMOptions",
+                                         "BidirectionalSequenceRNNOptions",
+                                         "UnidirectionalSequenceLSTMOptions",
+                                         "FloorModOptions",
+                                         "RangeOptions",
+                                         "ResizeNearestNeighborOptions",
+                                         "LeakyReluOptions",
+                                         "SquaredDifferenceOptions",
+                                         "MirrorPadOptions",
+                                         "AbsOptions",
+                                         "SplitVOptions",
+                                         "UniqueOptions",
+                                         "ReverseV2Options",
+                                         "AddNOptions",
+                                         "GatherNdOptions",
+                                         "CosOptions",
+                                         "WhereOptions",
+                                         "RankOptions",
+                                         "ReverseSequenceOptions",
+                                         "MatrixDiagOptions",
+                                         "QuantizeOptions",
+                                         "MatrixSetDiagOptions",
+                                         "HardSwishOptions",
+                                         "IfOptions",
+                                         "WhileOptions",
+                                         "DepthToSpaceOptions",
+                                         "NonMaxSuppressionV4Options",
+                                         "NonMaxSuppressionV5Options",
+                                         "ScatterNdOptions",
+                                         "SelectV2Options",
+                                         "DensifyOptions",
+                                         "SegmentSumOptions",
+                                         "BatchMatMulOptions",
+                                         "CumsumOptions",
+                                         "CallOnceOptions",
+                                         "BroadcastToOptions",
+                                         "Rfft2dOptions",
+                                         "Conv3DOptions",
+                                         "HashtableOptions",
+                                         "HashtableFindOptions",
+                                         "HashtableImportOptions",
+                                         "HashtableSizeOptions",
+                                         "VarHandleOptions",
+                                         "ReadVariableOptions",
+                                         "AssignVariableOptions",
+                                         "RandomOptions",
+                                         nullptr};
    return names;
  }
  
  inline const char *EnumNameBuiltinOptions(BuiltinOptions e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_RandomOptions))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesBuiltinOptions()[index];
  }
  
@@ -1159,517 +1437,582 @@ template <typename T> struct BuiltinOptionsTraits
    static const BuiltinOptions enum_value = BuiltinOptions_NONE;
  };
  
-template <> struct BuiltinOptionsTraits<Conv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv2DOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions;
  };
  
-template <> struct BuiltinOptionsTraits<DepthwiseConv2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthwiseConv2DOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ConcatEmbeddingsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatEmbeddingsOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LSHProjectionOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSHProjectionOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions;
  };
  
-template <> struct BuiltinOptionsTraits<Pool2DOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::Pool2DOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SVDFOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SVDFOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions;
  };
  
-template <> struct BuiltinOptionsTraits<RNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RNNOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions;
  };
  
-template <> struct BuiltinOptionsTraits<FullyConnectedOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FullyConnectedOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SoftmaxOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ConcatenationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ConcatenationOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions;
  };
  
-template <> struct BuiltinOptionsTraits<AddOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_AddOptions;
  };
  
-template <> struct BuiltinOptionsTraits<L2NormOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::L2NormOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LocalResponseNormalizationOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LocalResponseNormalizationOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LSTMOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ResizeBilinearOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeBilinearOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions;
  };
  
-template <> struct BuiltinOptionsTraits<CallOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_CallOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ReshapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReshapeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SkipGramOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SkipGramOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SpaceToDepthOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToDepthOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions;
  };
  
-template <> struct BuiltinOptionsTraits<EmbeddingLookupSparseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EmbeddingLookupSparseOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions;
  };
  
-template <> struct BuiltinOptionsTraits<MulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MulOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_MulOptions;
  };
  
-template <> struct BuiltinOptionsTraits<PadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_PadOptions;
  };
  
-template <> struct BuiltinOptionsTraits<GatherOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions;
  };
  
-template <> struct BuiltinOptionsTraits<BatchToSpaceNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchToSpaceNDOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SpaceToBatchNDOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SpaceToBatchNDOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions;
  };
  
-template <> struct BuiltinOptionsTraits<TransposeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ReducerOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReducerOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ReducerOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SubOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SubOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SubOptions;
  };
  
-template <> struct BuiltinOptionsTraits<DivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DivOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_DivOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SqueezeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SqueezeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SequenceRNNOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions;
  };
  
-template <> struct BuiltinOptionsTraits<StridedSliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::StridedSliceOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ExpOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions;
  };
  
-template <> struct BuiltinOptionsTraits<TopKV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::TopKV2Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options;
  };
  
-template <> struct BuiltinOptionsTraits<SplitOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LogSoftmaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogSoftmaxOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions;
  };
  
-template <> struct BuiltinOptionsTraits<CastOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CastOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_CastOptions;
  };
  
-template <> struct BuiltinOptionsTraits<DequantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DequantizeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<MaximumMinimumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MaximumMinimumOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_MaximumMinimumOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ArgMaxOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMaxOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ArgMaxOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LessOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LessOptions;
  };
  
-template <> struct BuiltinOptionsTraits<NegOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NegOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_NegOptions;
  };
  
-template <> struct BuiltinOptionsTraits<PadV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::PadV2Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_PadV2Options;
  };
  
-template <> struct BuiltinOptionsTraits<GreaterOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_GreaterOptions;
  };
  
-template <> struct BuiltinOptionsTraits<GreaterEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GreaterEqualOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_GreaterEqualOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LessEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LessEqualOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LessEqualOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SelectOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SelectOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SliceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SliceOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SliceOptions;
  };
  
-template <> struct BuiltinOptionsTraits<TransposeConvOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TransposeConvOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_TransposeConvOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SparseToDenseOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SparseToDenseOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SparseToDenseOptions;
  };
  
-template <> struct BuiltinOptionsTraits<TileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::TileOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_TileOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ExpandDimsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ExpandDimsOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ExpandDimsOptions;
  };
  
-template <> struct BuiltinOptionsTraits<EqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::EqualOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_EqualOptions;
  };
  
-template <> struct BuiltinOptionsTraits<NotEqualOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::NotEqualOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_NotEqualOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ShapeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ShapeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ShapeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<PowOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PowOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_PowOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ArgMinOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ArgMinOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ArgMinOptions;
  };
  
-template <> struct BuiltinOptionsTraits<FakeQuantOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FakeQuantOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_FakeQuantOptions;
  };
  
-template <> struct BuiltinOptionsTraits<PackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::PackOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_PackOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LogicalOrOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalOrOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LogicalOrOptions;
  };
  
-template <> struct BuiltinOptionsTraits<OneHotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::OneHotOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_OneHotOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LogicalAndOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalAndOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LogicalAndOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LogicalNotOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LogicalNotOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LogicalNotOptions;
  };
  
-template <> struct BuiltinOptionsTraits<UnpackOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnpackOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_UnpackOptions;
  };
  
-template <> struct BuiltinOptionsTraits<FloorDivOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorDivOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_FloorDivOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SquareOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquareOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SquareOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ZerosLikeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ZerosLikeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ZerosLikeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<FillOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FillOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_FillOptions;
  };
  
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceLSTMOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceLSTMOptions;
  };
  
-template <> struct BuiltinOptionsTraits<BidirectionalSequenceRNNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BidirectionalSequenceRNNOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions;
  };
  
-template <> struct BuiltinOptionsTraits<UnidirectionalSequenceLSTMOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UnidirectionalSequenceLSTMOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions;
  };
  
-template <> struct BuiltinOptionsTraits<FloorModOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::FloorModOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions;
  };
  
-template <> struct BuiltinOptionsTraits<RangeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RangeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ResizeNearestNeighborOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ResizeNearestNeighborOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ResizeNearestNeighborOptions;
  };
  
-template <> struct BuiltinOptionsTraits<LeakyReluOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::LeakyReluOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_LeakyReluOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SquaredDifferenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SquaredDifferenceOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SquaredDifferenceOptions;
  };
  
-template <> struct BuiltinOptionsTraits<MirrorPadOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MirrorPadOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_MirrorPadOptions;
  };
  
-template <> struct BuiltinOptionsTraits<AbsOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AbsOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_AbsOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SplitVOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SplitVOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SplitVOptions;
  };
  
-template <> struct BuiltinOptionsTraits<UniqueOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::UniqueOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_UniqueOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ReverseV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseV2Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ReverseV2Options;
  };
  
-template <> struct BuiltinOptionsTraits<AddNOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::AddNOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_AddNOptions;
  };
  
-template <> struct BuiltinOptionsTraits<GatherNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::GatherNdOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_GatherNdOptions;
  };
  
-template <> struct BuiltinOptionsTraits<CosOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::CosOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_CosOptions;
  };
  
-template <> struct BuiltinOptionsTraits<WhereOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhereOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions;
  };
  
-template <> struct BuiltinOptionsTraits<RankOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::RankOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_RankOptions;
  };
  
-template <> struct BuiltinOptionsTraits<ReverseSequenceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ReverseSequenceOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ReverseSequenceOptions;
  };
  
-template <> struct BuiltinOptionsTraits<MatrixDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixDiagOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_MatrixDiagOptions;
  };
  
-template <> struct BuiltinOptionsTraits<QuantizeOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::QuantizeOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_QuantizeOptions;
  };
  
-template <> struct BuiltinOptionsTraits<MatrixSetDiagOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::MatrixSetDiagOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_MatrixSetDiagOptions;
  };
  
-template <> struct BuiltinOptionsTraits<HardSwishOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::HardSwishOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_HardSwishOptions;
  };
  
-template <> struct BuiltinOptionsTraits<IfOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::IfOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_IfOptions;
  };
  
-template <> struct BuiltinOptionsTraits<WhileOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::WhileOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_WhileOptions;
  };
  
-template <> struct BuiltinOptionsTraits<DepthToSpaceOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DepthToSpaceOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_DepthToSpaceOptions;
  };
  
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV4Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV4Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV4Options;
  };
  
-template <> struct BuiltinOptionsTraits<NonMaxSuppressionV5Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::NonMaxSuppressionV5Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_NonMaxSuppressionV5Options;
  };
  
-template <> struct BuiltinOptionsTraits<ScatterNdOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::ScatterNdOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_ScatterNdOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SelectV2Options>
+template <> struct BuiltinOptionsTraits<onert_tflite::SelectV2Options>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SelectV2Options;
  };
  
-template <> struct BuiltinOptionsTraits<DensifyOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::DensifyOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_DensifyOptions;
  };
  
-template <> struct BuiltinOptionsTraits<SegmentSumOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::SegmentSumOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_SegmentSumOptions;
  };
  
-template <> struct BuiltinOptionsTraits<BatchMatMulOptions>
+template <> struct BuiltinOptionsTraits<onert_tflite::BatchMatMulOptions>
  {
    static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions;
  };
  
+template <> struct BuiltinOptionsTraits<onert_tflite::CumsumOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CumsumOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::CallOnceOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_CallOnceOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::BroadcastToOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Rfft2dOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Rfft2dOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::Conv3DOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_Conv3DOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableFindOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableFindOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableImportOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableImportOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::HashtableSizeOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_HashtableSizeOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::VarHandleOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_VarHandleOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::ReadVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_ReadVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::AssignVariableOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
+};
+
+template <> struct BuiltinOptionsTraits<onert_tflite::RandomOptions>
+{
+  static const BuiltinOptions enum_value = BuiltinOptions_RandomOptions;
+};
+
  bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
  bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier,
                                  const flatbuffers::Vector<flatbuffers::Offset<void>> *values,
                                  const flatbuffers::Vector<uint8_t> *types);
  
-enum Padding
+enum Padding : int8_t
  {
    Padding_SAME = 0,
    Padding_VALID = 1,
@@ -1685,17 +2028,19 @@ inline const Padding (&EnumValuesPadding())[2]
  
  inline const char *const *EnumNamesPadding()
  {
-  static const char *const names[] = {"SAME", "VALID", nullptr};
+  static const char *const names[3] = {"SAME", "VALID", nullptr};
    return names;
  }
  
  inline const char *EnumNamePadding(Padding e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, Padding_SAME, Padding_VALID))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesPadding()[index];
  }
  
-enum ActivationFunctionType
+enum ActivationFunctionType : int8_t
  {
    ActivationFunctionType_NONE = 0,
    ActivationFunctionType_RELU = 1,
@@ -1717,18 +2062,20 @@ inline const ActivationFunctionType (&EnumValuesActivationFunctionType())[6]
  
  inline const char *const *EnumNamesActivationFunctionType()
  {
-  static const char *const names[] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
-                                      "TANH", "SIGN_BIT", nullptr};
+  static const char *const names[7] = {"NONE", "RELU",     "RELU_N1_TO_1", "RELU6",
+                                       "TANH", "SIGN_BIT", nullptr};
    return names;
  }
  
  inline const char *EnumNameActivationFunctionType(ActivationFunctionType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, ActivationFunctionType_NONE, ActivationFunctionType_SIGN_BIT))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesActivationFunctionType()[index];
  }
  
-enum LSHProjectionType
+enum LSHProjectionType : int8_t
  {
    LSHProjectionType_UNKNOWN = 0,
    LSHProjectionType_SPARSE = 1,
@@ -1746,17 +2093,19 @@ inline const LSHProjectionType (&EnumValuesLSHProjectionType())[3]
  
  inline const char *const *EnumNamesLSHProjectionType()
  {
-  static const char *const names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
+  static const char *const names[4] = {"UNKNOWN", "SPARSE", "DENSE", nullptr};
    return names;
  }
  
  inline const char *EnumNameLSHProjectionType(LSHProjectionType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSHProjectionType_UNKNOWN, LSHProjectionType_DENSE))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesLSHProjectionType()[index];
  }
  
-enum FullyConnectedOptionsWeightsFormat
+enum FullyConnectedOptionsWeightsFormat : int8_t
  {
    FullyConnectedOptionsWeightsFormat_DEFAULT = 0,
    FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8 = 1,
@@ -1774,17 +2123,20 @@ inline const FullyConnectedOptionsWeightsFormat (&EnumValuesFullyConnectedOption
  
  inline const char *const *EnumNamesFullyConnectedOptionsWeightsFormat()
  {
-  static const char *const names[] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
+  static const char *const names[3] = {"DEFAULT", "SHUFFLED4x16INT8", nullptr};
    return names;
  }
  
  inline const char *EnumNameFullyConnectedOptionsWeightsFormat(FullyConnectedOptionsWeightsFormat e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, FullyConnectedOptionsWeightsFormat_DEFAULT,
+                              FullyConnectedOptionsWeightsFormat_SHUFFLED4x16INT8))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesFullyConnectedOptionsWeightsFormat()[index];
  }
  
-enum LSTMKernelType
+enum LSTMKernelType : int8_t
  {
    LSTMKernelType_FULL = 0,
    LSTMKernelType_BASIC = 1,
@@ -1800,17 +2152,19 @@ inline const LSTMKernelType (&EnumValuesLSTMKernelType())[2]
  
  inline const char *const *EnumNamesLSTMKernelType()
  {
-  static const char *const names[] = {"FULL", "BASIC", nullptr};
+  static const char *const names[3] = {"FULL", "BASIC", nullptr};
    return names;
  }
  
  inline const char *EnumNameLSTMKernelType(LSTMKernelType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, LSTMKernelType_FULL, LSTMKernelType_BASIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesLSTMKernelType()[index];
  }
  
-enum CombinerType
+enum CombinerType : int8_t
  {
    CombinerType_SUM = 0,
    CombinerType_MEAN = 1,
@@ -1827,17 +2181,19 @@ inline const CombinerType (&EnumValuesCombinerType())[3]
  
  inline const char *const *EnumNamesCombinerType()
  {
-  static const char *const names[] = {"SUM", "MEAN", "SQRTN", nullptr};
+  static const char *const names[4] = {"SUM", "MEAN", "SQRTN", nullptr};
    return names;
  }
  
  inline const char *EnumNameCombinerType(CombinerType e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CombinerType_SUM, CombinerType_SQRTN))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesCombinerType()[index];
  }
  
-enum MirrorPadMode
+enum MirrorPadMode : int8_t
  {
    MirrorPadMode_REFLECT = 0,
    MirrorPadMode_SYMMETRIC = 1,
@@ -1853,17 +2209,19 @@ inline const MirrorPadMode (&EnumValuesMirrorPadMode())[2]
  
  inline const char *const *EnumNamesMirrorPadMode()
  {
-  static const char *const names[] = {"REFLECT", "SYMMETRIC", nullptr};
+  static const char *const names[3] = {"REFLECT", "SYMMETRIC", nullptr};
    return names;
  }
  
  inline const char *EnumNameMirrorPadMode(MirrorPadMode e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, MirrorPadMode_REFLECT, MirrorPadMode_SYMMETRIC))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesMirrorPadMode()[index];
  }
  
-enum CustomOptionsFormat
+enum CustomOptionsFormat : int8_t
  {
    CustomOptionsFormat_FLEXBUFFERS = 0,
    CustomOptionsFormat_MIN = CustomOptionsFormat_FLEXBUFFERS,
@@ -1878,19 +2236,22 @@ inline const CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1]
  
  inline const char *const *EnumNamesCustomOptionsFormat()
  {
-  static const char *const names[] = {"FLEXBUFFERS", nullptr};
+  static const char *const names[2] = {"FLEXBUFFERS", nullptr};
    return names;
  }
  
  inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e)
  {
-  const size_t index = static_cast<int>(e);
+  if (flatbuffers::IsOutRange(e, CustomOptionsFormat_FLEXBUFFERS, CustomOptionsFormat_FLEXBUFFERS))
+    return "";
+  const size_t index = static_cast<size_t>(e);
    return EnumNamesCustomOptionsFormat()[index];
  }
  
  struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef CustomQuantizationBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_CUSTOM = 4
    };
@@ -1907,6 +2268,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct CustomQuantizationBuilder
  {
+  typedef CustomQuantization Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom)
@@ -1917,7 +2279,6 @@ struct CustomQuantizationBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CustomQuantizationBuilder &operator=(const CustomQuantizationBuilder &);
    flatbuffers::Offset<CustomQuantization> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -1939,13 +2300,18 @@ inline flatbuffers::Offset<CustomQuantization>
  CreateCustomQuantizationDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                 const std::vector<uint8_t> *custom = nullptr)
  {
-  return onert_tflite::CreateCustomQuantization(_fbb,
-                                                custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
+  if (custom)
+  {
+    _fbb.ForceVectorAlignment(custom->size(), sizeof(uint8_t), 16);
+  }
+  auto custom__ = custom ? _fbb.CreateVector<uint8_t>(*custom) : 0;
+  return onert_tflite::CreateCustomQuantization(_fbb, custom__);
  }
  
  struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef QuantizationParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_MIN = 4,
      VT_MAX = 6,
@@ -1971,16 +2337,16 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
    {
      return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_ZERO_POINT);
    }
-  QuantizationDetails details_type() const
+  onert_tflite::QuantizationDetails details_type() const
    {
-    return static_cast<QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
+    return static_cast<onert_tflite::QuantizationDetails>(GetField<uint8_t>(VT_DETAILS_TYPE, 0));
    }
    const void *details() const { return GetPointer<const void *>(VT_DETAILS); }
    template <typename T> const T *details_as() const;
-  const CustomQuantization *details_as_CustomQuantization() const
+  const onert_tflite::CustomQuantization *details_as_CustomQuantization() const
    {
-    return details_type() == QuantizationDetails_CustomQuantization
-             ? static_cast<const CustomQuantization *>(details())
+    return details_type() == onert_tflite::QuantizationDetails_CustomQuantization
+             ? static_cast<const onert_tflite::CustomQuantization *>(details())
               : nullptr;
    }
    int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
@@ -1998,13 +2364,15 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
  };
  
  template <>
-inline const CustomQuantization *QuantizationParameters::details_as<CustomQuantization>() const
+inline const onert_tflite::CustomQuantization *
+QuantizationParameters::details_as<onert_tflite::CustomQuantization>() const
  {
    return details_as_CustomQuantization();
  }
  
  struct QuantizationParametersBuilder
  {
+  typedef QuantizationParameters Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_min(flatbuffers::Offset<flatbuffers::Vector<float>> min)
@@ -2023,7 +2391,7 @@ struct QuantizationParametersBuilder
    {
      fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point);
    }
-  void add_details_type(QuantizationDetails details_type)
+  void add_details_type(onert_tflite::QuantizationDetails details_type)
    {
      fbb_.AddElement<uint8_t>(QuantizationParameters::VT_DETAILS_TYPE,
                               static_cast<uint8_t>(details_type), 0);
@@ -2041,7 +2409,6 @@ struct QuantizationParametersBuilder
    {
      start_ = fbb_.StartTable();
    }
-  QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &);
    flatbuffers::Offset<QuantizationParameters> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2050,14 +2417,13 @@ struct QuantizationParametersBuilder
    }
  };
  
-inline flatbuffers::Offset<QuantizationParameters>
-CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
-                             flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
-                             QuantizationDetails details_type = QuantizationDetails_NONE,
-                             flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
+inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<float>> min = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> max = 0,
+  flatbuffers::Offset<flatbuffers::Vector<float>> scale = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int64_t>> zero_point = 0,
+  onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
+  flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
  {
    QuantizationParametersBuilder builder_(_fbb);
    builder_.add_quantized_dimension(quantized_dimension);
@@ -2074,19 +2440,21 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersD
    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<float> *min = nullptr,
    const std::vector<float> *max = nullptr, const std::vector<float> *scale = nullptr,
    const std::vector<int64_t> *zero_point = nullptr,
-  QuantizationDetails details_type = QuantizationDetails_NONE,
+  onert_tflite::QuantizationDetails details_type = onert_tflite::QuantizationDetails_NONE,
    flatbuffers::Offset<void> details = 0, int32_t quantized_dimension = 0)
  {
-  return onert_tflite::CreateQuantizationParameters(
-    _fbb, min ? _fbb.CreateVector<float>(*min) : 0, max ? _fbb.CreateVector<float>(*max) : 0,
-    scale ? _fbb.CreateVector<float>(*scale) : 0,
-    zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0, details_type, details,
-    quantized_dimension);
+  auto min__ = min ? _fbb.CreateVector<float>(*min) : 0;
+  auto max__ = max ? _fbb.CreateVector<float>(*max) : 0;
+  auto scale__ = scale ? _fbb.CreateVector<float>(*scale) : 0;
+  auto zero_point__ = zero_point ? _fbb.CreateVector<int64_t>(*zero_point) : 0;
+  return onert_tflite::CreateQuantizationParameters(_fbb, min__, max__, scale__, zero_point__,
+                                                    details_type, details, quantized_dimension);
  }
  
  struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef Int32VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VALUES = 4
    };
@@ -2103,6 +2471,7 @@ struct Int32Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct Int32VectorBuilder
  {
+  typedef Int32Vector Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_values(flatbuffers::Offset<flatbuffers::Vector<int32_t>> values)
@@ -2113,7 +2482,6 @@ struct Int32VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Int32VectorBuilder &operator=(const Int32VectorBuilder &);
    flatbuffers::Offset<Int32Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2135,12 +2503,14 @@ inline flatbuffers::Offset<Int32Vector>
  CreateInt32VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                          const std::vector<int32_t> *values = nullptr)
  {
-  return onert_tflite::CreateInt32Vector(_fbb, values ? _fbb.CreateVector<int32_t>(*values) : 0);
+  auto values__ = values ? _fbb.CreateVector<int32_t>(*values) : 0;
+  return onert_tflite::CreateInt32Vector(_fbb, values__);
  }
  
  struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef Uint16VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VALUES = 4
    };
@@ -2157,6 +2527,7 @@ struct Uint16Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct Uint16VectorBuilder
  {
+  typedef Uint16Vector Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_values(flatbuffers::Offset<flatbuffers::Vector<uint16_t>> values)
@@ -2167,7 +2538,6 @@ struct Uint16VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Uint16VectorBuilder &operator=(const Uint16VectorBuilder &);
    flatbuffers::Offset<Uint16Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2189,12 +2559,18 @@ inline flatbuffers::Offset<Uint16Vector>
  CreateUint16VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                           const std::vector<uint16_t> *values = nullptr)
  {
-  return onert_tflite::CreateUint16Vector(_fbb, values ? _fbb.CreateVector<uint16_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint16_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint16_t>(*values) : 0;
+  return onert_tflite::CreateUint16Vector(_fbb, values__);
  }
  
  struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef Uint8VectorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VALUES = 4
    };
@@ -2211,6 +2587,7 @@ struct Uint8Vector FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct Uint8VectorBuilder
  {
+  typedef Uint8Vector Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_values(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> values)
@@ -2221,7 +2598,6 @@ struct Uint8VectorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Uint8VectorBuilder &operator=(const Uint8VectorBuilder &);
    flatbuffers::Offset<Uint8Vector> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2243,12 +2619,18 @@ inline flatbuffers::Offset<Uint8Vector>
  CreateUint8VectorDirect(flatbuffers::FlatBufferBuilder &_fbb,
                          const std::vector<uint8_t> *values = nullptr)
  {
-  return onert_tflite::CreateUint8Vector(_fbb, values ? _fbb.CreateVector<uint8_t>(*values) : 0);
+  if (values)
+  {
+    _fbb.ForceVectorAlignment(values->size(), sizeof(uint8_t), 4);
+  }
+  auto values__ = values ? _fbb.CreateVector<uint8_t>(*values) : 0;
+  return onert_tflite::CreateUint8Vector(_fbb, values__);
  }
  
  struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef DimensionMetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FORMAT = 4,
      VT_DENSE_SIZE = 6,
@@ -2257,57 +2639,59 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_ARRAY_INDICES_TYPE = 12,
      VT_ARRAY_INDICES = 14
    };
-  DimensionType format() const
+  onert_tflite::DimensionType format() const
    {
-    return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+    return static_cast<onert_tflite::DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
    }
    int32_t dense_size() const { return GetField<int32_t>(VT_DENSE_SIZE, 0); }
-  SparseIndexVector array_segments_type() const
+  onert_tflite::SparseIndexVector array_segments_type() const
    {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
+    return static_cast<onert_tflite::SparseIndexVector>(
+      GetField<uint8_t>(VT_ARRAY_SEGMENTS_TYPE, 0));
    }
    const void *array_segments() const { return GetPointer<const void *>(VT_ARRAY_SEGMENTS); }
    template <typename T> const T *array_segments_as() const;
-  const Int32Vector *array_segments_as_Int32Vector() const
+  const onert_tflite::Int32Vector *array_segments_as_Int32Vector() const
    {
-    return array_segments_type() == SparseIndexVector_Int32Vector
-             ? static_cast<const Int32Vector *>(array_segments())
+    return array_segments_type() == onert_tflite::SparseIndexVector_Int32Vector
+             ? static_cast<const onert_tflite::Int32Vector *>(array_segments())
               : nullptr;
    }
-  const Uint16Vector *array_segments_as_Uint16Vector() const
+  const onert_tflite::Uint16Vector *array_segments_as_Uint16Vector() const
    {
-    return array_segments_type() == SparseIndexVector_Uint16Vector
-             ? static_cast<const Uint16Vector *>(array_segments())
+    return array_segments_type() == onert_tflite::SparseIndexVector_Uint16Vector
+             ? static_cast<const onert_tflite::Uint16Vector *>(array_segments())
               : nullptr;
    }
-  const Uint8Vector *array_segments_as_Uint8Vector() const
+  const onert_tflite::Uint8Vector *array_segments_as_Uint8Vector() const
    {
-    return array_segments_type() == SparseIndexVector_Uint8Vector
-             ? static_cast<const Uint8Vector *>(array_segments())
+    return array_segments_type() == onert_tflite::SparseIndexVector_Uint8Vector
+             ? static_cast<const onert_tflite::Uint8Vector *>(array_segments())
               : nullptr;
    }
-  SparseIndexVector array_indices_type() const
+  onert_tflite::SparseIndexVector array_indices_type() const
    {
-    return static_cast<SparseIndexVector>(GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
+    return static_cast<onert_tflite::SparseIndexVector>(
+      GetField<uint8_t>(VT_ARRAY_INDICES_TYPE, 0));
    }
    const void *array_indices() const { return GetPointer<const void *>(VT_ARRAY_INDICES); }
    template <typename T> const T *array_indices_as() const;
-  const Int32Vector *array_indices_as_Int32Vector() const
+  const onert_tflite::Int32Vector *array_indices_as_Int32Vector() const
    {
-    return array_indices_type() == SparseIndexVector_Int32Vector
-             ? static_cast<const Int32Vector *>(array_indices())
+    return array_indices_type() == onert_tflite::SparseIndexVector_Int32Vector
+             ? static_cast<const onert_tflite::Int32Vector *>(array_indices())
               : nullptr;
    }
-  const Uint16Vector *array_indices_as_Uint16Vector() const
+  const onert_tflite::Uint16Vector *array_indices_as_Uint16Vector() const
    {
-    return array_indices_type() == SparseIndexVector_Uint16Vector
-             ? static_cast<const Uint16Vector *>(array_indices())
+    return array_indices_type() == onert_tflite::SparseIndexVector_Uint16Vector
+             ? static_cast<const onert_tflite::Uint16Vector *>(array_indices())
               : nullptr;
    }
-  const Uint8Vector *array_indices_as_Uint8Vector() const
+  const onert_tflite::Uint8Vector *array_indices_as_Uint8Vector() const
    {
-    return array_indices_type() == SparseIndexVector_Uint8Vector
-             ? static_cast<const Uint8Vector *>(array_indices())
+    return array_indices_type() == onert_tflite::SparseIndexVector_Uint8Vector
+             ? static_cast<const onert_tflite::Uint8Vector *>(array_indices())
               : nullptr;
    }
    bool Verify(flatbuffers::Verifier &verifier) const
@@ -2324,41 +2708,54 @@ struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    }
  };
  
-template <> inline const Int32Vector *DimensionMetadata::array_segments_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Int32Vector>() const
  {
    return array_segments_as_Int32Vector();
  }
  
-template <> inline const Uint16Vector *DimensionMetadata::array_segments_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint16Vector>() const
  {
    return array_segments_as_Uint16Vector();
  }
  
-template <> inline const Uint8Vector *DimensionMetadata::array_segments_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_segments_as<onert_tflite::Uint8Vector>() const
  {
    return array_segments_as_Uint8Vector();
  }
  
-template <> inline const Int32Vector *DimensionMetadata::array_indices_as<Int32Vector>() const
+template <>
+inline const onert_tflite::Int32Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Int32Vector>() const
  {
    return array_indices_as_Int32Vector();
  }
  
-template <> inline const Uint16Vector *DimensionMetadata::array_indices_as<Uint16Vector>() const
+template <>
+inline const onert_tflite::Uint16Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint16Vector>() const
  {
    return array_indices_as_Uint16Vector();
  }
  
-template <> inline const Uint8Vector *DimensionMetadata::array_indices_as<Uint8Vector>() const
+template <>
+inline const onert_tflite::Uint8Vector *
+DimensionMetadata::array_indices_as<onert_tflite::Uint8Vector>() const
  {
    return array_indices_as_Uint8Vector();
  }
  
  struct DimensionMetadataBuilder
  {
+  typedef DimensionMetadata Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_format(DimensionType format)
+  void add_format(onert_tflite::DimensionType format)
    {
      fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
    }
@@ -2366,7 +2763,7 @@ struct DimensionMetadataBuilder
    {
      fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
    }
-  void add_array_segments_type(SparseIndexVector array_segments_type)
+  void add_array_segments_type(onert_tflite::SparseIndexVector array_segments_type)
    {
      fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_SEGMENTS_TYPE,
                               static_cast<uint8_t>(array_segments_type), 0);
@@ -2375,7 +2772,7 @@ struct DimensionMetadataBuilder
    {
      fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
    }
-  void add_array_indices_type(SparseIndexVector array_indices_type)
+  void add_array_indices_type(onert_tflite::SparseIndexVector array_indices_type)
    {
      fbb_.AddElement<uint8_t>(DimensionMetadata::VT_ARRAY_INDICES_TYPE,
                               static_cast<uint8_t>(array_indices_type), 0);
@@ -2388,7 +2785,6 @@ struct DimensionMetadataBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
    flatbuffers::Offset<DimensionMetadata> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2397,13 +2793,13 @@ struct DimensionMetadataBuilder
    }
  };
  
-inline flatbuffers::Offset<DimensionMetadata>
-CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
-                        DimensionType format = DimensionType_DENSE, int32_t dense_size = 0,
-                        SparseIndexVector array_segments_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_segments = 0,
-                        SparseIndexVector array_indices_type = SparseIndexVector_NONE,
-                        flatbuffers::Offset<void> array_indices = 0)
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::DimensionType format = onert_tflite::DimensionType_DENSE, int32_t dense_size = 0,
+  onert_tflite::SparseIndexVector array_segments_type = onert_tflite::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_segments = 0,
+  onert_tflite::SparseIndexVector array_indices_type = onert_tflite::SparseIndexVector_NONE,
+  flatbuffers::Offset<void> array_indices = 0)
  {
    DimensionMetadataBuilder builder_(_fbb);
    builder_.add_array_indices(array_indices);
@@ -2417,7 +2813,8 @@ CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SparsityParametersBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_TRAVERSAL_ORDER = 4,
      VT_BLOCK_MAP = 6,
@@ -2431,9 +2828,11 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    {
      return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
    }
-  const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *
+  dim_metadata() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(
+    return GetPointer<
+      const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *>(
        VT_DIM_METADATA);
    }
    bool Verify(flatbuffers::Verifier &verifier) const
@@ -2448,6 +2847,7 @@ struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SparsityParametersBuilder
  {
+  typedef SparsityParameters Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order)
@@ -2459,7 +2859,8 @@ struct SparsityParametersBuilder
      fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
    }
    void add_dim_metadata(
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+      dim_metadata)
    {
      fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
    }
@@ -2467,7 +2868,6 @@ struct SparsityParametersBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
    flatbuffers::Offset<SparsityParameters> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2480,7 +2880,8 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
    flatbuffers::FlatBufferBuilder &_fbb,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0)
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>>
+    dim_metadata = 0)
  {
    SparsityParametersBuilder builder_(_fbb);
    builder_.add_dim_metadata(dim_metadata);
@@ -2492,17 +2893,22 @@ inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
  inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *traversal_order = nullptr,
    const std::vector<int32_t> *block_map = nullptr,
-  const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr)
+  const std::vector<flatbuffers::Offset<onert_tflite::DimensionMetadata>> *dim_metadata = nullptr)
  {
-  return onert_tflite::CreateSparsityParameters(
-    _fbb, traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0,
-    block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0,
-    dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0);
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ =
+    dim_metadata
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::DimensionMetadata>>(*dim_metadata)
+      : 0;
+  return onert_tflite::CreateSparsityParameters(_fbb, traversal_order__, block_map__,
+                                                dim_metadata__);
  }
  
  struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef TensorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_SHAPE = 4,
      VT_TYPE = 6,
@@ -2517,20 +2923,23 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    {
      return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
    }
-  TensorType type() const { return static_cast<TensorType>(GetField<int8_t>(VT_TYPE, 0)); }
+  onert_tflite::TensorType type() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_TYPE, 0));
+  }
    uint32_t buffer() const { return GetField<uint32_t>(VT_BUFFER, 0); }
    const flatbuffers::String *name() const
    {
      return GetPointer<const flatbuffers::String *>(VT_NAME);
    }
-  const QuantizationParameters *quantization() const
+  const onert_tflite::QuantizationParameters *quantization() const
    {
-    return GetPointer<const QuantizationParameters *>(VT_QUANTIZATION);
+    return GetPointer<const onert_tflite::QuantizationParameters *>(VT_QUANTIZATION);
    }
    bool is_variable() const { return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0; }
-  const SparsityParameters *sparsity() const
+  const onert_tflite::SparsityParameters *sparsity() const
    {
-    return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+    return GetPointer<const onert_tflite::SparsityParameters *>(VT_SPARSITY);
    }
    const flatbuffers::Vector<int32_t> *shape_signature() const
    {
@@ -2551,13 +2960,14 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct TensorBuilder
  {
+  typedef Tensor Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape)
    {
      fbb_.AddOffset(Tensor::VT_SHAPE, shape);
    }
-  void add_type(TensorType type)
+  void add_type(onert_tflite::TensorType type)
    {
      fbb_.AddElement<int8_t>(Tensor::VT_TYPE, static_cast<int8_t>(type), 0);
    }
@@ -2566,7 +2976,7 @@ struct TensorBuilder
    {
      fbb_.AddOffset(Tensor::VT_NAME, name);
    }
-  void add_quantization(flatbuffers::Offset<QuantizationParameters> quantization)
+  void add_quantization(flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization)
    {
      fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization);
    }
@@ -2574,7 +2984,7 @@ struct TensorBuilder
    {
      fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
    }
-  void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity)
+  void add_sparsity(flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity)
    {
      fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
    }
@@ -2586,7 +2996,6 @@ struct TensorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TensorBuilder &operator=(const TensorBuilder &);
    flatbuffers::Offset<Tensor> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2595,14 +3004,13 @@ struct TensorBuilder
    }
  };
  
-inline flatbuffers::Offset<Tensor>
-CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
-             TensorType type = TensorType_FLOAT32, uint32_t buffer = 0,
-             flatbuffers::Offset<flatbuffers::String> name = 0,
-             flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-             flatbuffers::Offset<SparsityParameters> sparsity = 0,
-             flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
+inline flatbuffers::Offset<Tensor> CreateTensor(
+  flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape = 0,
+  onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+  flatbuffers::Offset<flatbuffers::String> name = 0,
+  flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> shape_signature = 0)
  {
    TensorBuilder builder_(_fbb);
    builder_.add_shape_signature(shape_signature);
@@ -2618,20 +3026,23 @@ CreateTensor(flatbuffers::FlatBufferBuilder &_fbb,
  
  inline flatbuffers::Offset<Tensor> CreateTensorDirect(
    flatbuffers::FlatBufferBuilder &_fbb, const std::vector<int32_t> *shape = nullptr,
-  TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, const char *name = nullptr,
-  flatbuffers::Offset<QuantizationParameters> quantization = 0, bool is_variable = false,
-  flatbuffers::Offset<SparsityParameters> sparsity = 0,
+  onert_tflite::TensorType type = onert_tflite::TensorType_FLOAT32, uint32_t buffer = 0,
+  const char *name = nullptr,
+  flatbuffers::Offset<onert_tflite::QuantizationParameters> quantization = 0,
+  bool is_variable = false, flatbuffers::Offset<onert_tflite::SparsityParameters> sparsity = 0,
    const std::vector<int32_t> *shape_signature = nullptr)
  {
-  return onert_tflite::CreateTensor(
-    _fbb, shape ? _fbb.CreateVector<int32_t>(*shape) : 0, type, buffer,
-    name ? _fbb.CreateString(name) : 0, quantization, is_variable, sparsity,
-    shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0);
+  auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  auto shape_signature__ = shape_signature ? _fbb.CreateVector<int32_t>(*shape_signature) : 0;
+  return onert_tflite::CreateTensor(_fbb, shape__, type, buffer, name__, quantization, is_variable,
+                                    sparsity, shape_signature__);
  }
  
  struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef Conv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
      VT_STRIDE_W = 6,
@@ -2640,12 +3051,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_DILATION_W_FACTOR = 12,
      VT_DILATION_H_FACTOR = 14
    };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
    int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
    int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
    int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2662,9 +3077,10 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct Conv2DOptionsBuilder
  {
+  typedef Conv2DOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
    {
      fbb_.AddElement<int8_t>(Conv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
    }
@@ -2676,7 +3092,7 @@ struct Conv2DOptionsBuilder
    {
      fbb_.AddElement<int32_t>(Conv2DOptions::VT_STRIDE_H, stride_h, 0);
    }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -2693,7 +3109,6 @@ struct Conv2DOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &);
    flatbuffers::Offset<Conv2DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2703,9 +3118,11 @@ struct Conv2DOptionsBuilder
  };
  
  inline flatbuffers::Offset<Conv2DOptions>
-CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                    onert_tflite::Padding padding = onert_tflite::Padding_SAME,
                      int32_t stride_w = 0, int32_t stride_h = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                    onert_tflite::ActivationFunctionType fused_activation_function =
+                      onert_tflite::ActivationFunctionType_NONE,
                      int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
  {
    Conv2DOptionsBuilder builder_(_fbb);
@@ -2718,9 +3135,121 @@ CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
    return builder_.Finish();
  }
  
+struct Conv3DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Conv3DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_PADDING = 4,
+    VT_STRIDE_D = 6,
+    VT_STRIDE_W = 8,
+    VT_STRIDE_H = 10,
+    VT_FUSED_ACTIVATION_FUNCTION = 12,
+    VT_DILATION_D_FACTOR = 14,
+    VT_DILATION_W_FACTOR = 16,
+    VT_DILATION_H_FACTOR = 18
+  };
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
+  int32_t stride_d() const { return GetField<int32_t>(VT_STRIDE_D, 0); }
+  int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
+  int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
+  onert_tflite::ActivationFunctionType fused_activation_function() const
+  {
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+  }
+  int32_t dilation_d_factor() const { return GetField<int32_t>(VT_DILATION_D_FACTOR, 1); }
+  int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
+  int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_PADDING) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_D) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_W) &&
+           VerifyField<int32_t>(verifier, VT_STRIDE_H) &&
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_D_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_W_FACTOR) &&
+           VerifyField<int32_t>(verifier, VT_DILATION_H_FACTOR) && verifier.EndTable();
+  }
+};
+
+struct Conv3DOptionsBuilder
+{
+  typedef Conv3DOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_padding(onert_tflite::Padding padding)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
+  }
+  void add_stride_d(int32_t stride_d)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_D, stride_d, 0);
+  }
+  void add_stride_w(int32_t stride_w)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_W, stride_w, 0);
+  }
+  void add_stride_h(int32_t stride_h)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_STRIDE_H, stride_h, 0);
+  }
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
+  {
+    fbb_.AddElement<int8_t>(Conv3DOptions::VT_FUSED_ACTIVATION_FUNCTION,
+                            static_cast<int8_t>(fused_activation_function), 0);
+  }
+  void add_dilation_d_factor(int32_t dilation_d_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_D_FACTOR, dilation_d_factor, 1);
+  }
+  void add_dilation_w_factor(int32_t dilation_w_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_W_FACTOR, dilation_w_factor, 1);
+  }
+  void add_dilation_h_factor(int32_t dilation_h_factor)
+  {
+    fbb_.AddElement<int32_t>(Conv3DOptions::VT_DILATION_H_FACTOR, dilation_h_factor, 1);
+  }
+  explicit Conv3DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Conv3DOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Conv3DOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Conv3DOptions> CreateConv3DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_d = 0, int32_t stride_w = 0, int32_t stride_h = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
+  int32_t dilation_d_factor = 1, int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
+{
+  Conv3DOptionsBuilder builder_(_fbb);
+  builder_.add_dilation_h_factor(dilation_h_factor);
+  builder_.add_dilation_w_factor(dilation_w_factor);
+  builder_.add_dilation_d_factor(dilation_d_factor);
+  builder_.add_stride_h(stride_h);
+  builder_.add_stride_w(stride_w);
+  builder_.add_stride_d(stride_d);
+  builder_.add_fused_activation_function(fused_activation_function);
+  builder_.add_padding(padding);
+  return builder_.Finish();
+}
+
  struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef Pool2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
      VT_STRIDE_W = 6,
@@ -2729,14 +3258,18 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_FILTER_HEIGHT = 12,
      VT_FUSED_ACTIVATION_FUNCTION = 14
    };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
    int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
    int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
    int32_t filter_width() const { return GetField<int32_t>(VT_FILTER_WIDTH, 0); }
    int32_t filter_height() const { return GetField<int32_t>(VT_FILTER_HEIGHT, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -2751,9 +3284,10 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct Pool2DOptionsBuilder
  {
+  typedef Pool2DOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
    {
      fbb_.AddElement<int8_t>(Pool2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
    }
@@ -2773,7 +3307,7 @@ struct Pool2DOptionsBuilder
    {
      fbb_.AddElement<int32_t>(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0);
    }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -2782,7 +3316,6 @@ struct Pool2DOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &);
    flatbuffers::Offset<Pool2DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2791,11 +3324,11 @@ struct Pool2DOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<Pool2DOptions>
-CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
-                    int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0,
-                    int32_t filter_height = 0,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<Pool2DOptions> CreatePool2DOptions(
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, int32_t filter_height = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE)
  {
    Pool2DOptionsBuilder builder_(_fbb);
    builder_.add_filter_height(filter_height);
@@ -2809,7 +3342,8 @@ CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padd
  
  struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef DepthwiseConv2DOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
      VT_STRIDE_W = 6,
@@ -2819,13 +3353,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
      VT_DILATION_W_FACTOR = 14,
      VT_DILATION_H_FACTOR = 16
    };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
    int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
    int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
    int32_t depth_multiplier() const { return GetField<int32_t>(VT_DEPTH_MULTIPLIER, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    int32_t dilation_w_factor() const { return GetField<int32_t>(VT_DILATION_W_FACTOR, 1); }
    int32_t dilation_h_factor() const { return GetField<int32_t>(VT_DILATION_H_FACTOR, 1); }
@@ -2843,9 +3381,10 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
  
  struct DepthwiseConv2DOptionsBuilder
  {
+  typedef DepthwiseConv2DOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
    {
      fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
    }
@@ -2861,7 +3400,7 @@ struct DepthwiseConv2DOptionsBuilder
    {
      fbb_.AddElement<int32_t>(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0);
    }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -2878,7 +3417,6 @@ struct DepthwiseConv2DOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &);
    flatbuffers::Offset<DepthwiseConv2DOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2888,9 +3426,10 @@ struct DepthwiseConv2DOptionsBuilder
  };
  
  inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
-  flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, int32_t stride_w = 0,
-  int32_t stride_h = 0, int32_t depth_multiplier = 0,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  flatbuffers::FlatBufferBuilder &_fbb, onert_tflite::Padding padding = onert_tflite::Padding_SAME,
+  int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
    int32_t dilation_w_factor = 1, int32_t dilation_h_factor = 1)
  {
    DepthwiseConv2DOptionsBuilder builder_(_fbb);
@@ -2906,7 +3445,8 @@ inline flatbuffers::Offset<DepthwiseConv2DOptions> CreateDepthwiseConv2DOptions(
  
  struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ConcatEmbeddingsOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NUM_CHANNELS = 4,
      VT_NUM_COLUMNS_PER_CHANNEL = 6,
@@ -2933,6 +3473,7 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Ta
  
  struct ConcatEmbeddingsOptionsBuilder
  {
+  typedef ConcatEmbeddingsOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_num_channels(int32_t num_channels)
@@ -2954,7 +3495,6 @@ struct ConcatEmbeddingsOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &);
    flatbuffers::Offset<ConcatEmbeddingsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -2980,21 +3520,24 @@ CreateConcatEmbeddingsOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, int32_
                                      const std::vector<int32_t> *num_columns_per_channel = nullptr,
                                      const std::vector<int32_t> *embedding_dim_per_channel = nullptr)
  {
-  return onert_tflite::CreateConcatEmbeddingsOptions(
-    _fbb, num_channels,
-    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0,
-    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0);
+  auto num_columns_per_channel__ =
+    num_columns_per_channel ? _fbb.CreateVector<int32_t>(*num_columns_per_channel) : 0;
+  auto embedding_dim_per_channel__ =
+    embedding_dim_per_channel ? _fbb.CreateVector<int32_t>(*embedding_dim_per_channel) : 0;
+  return onert_tflite::CreateConcatEmbeddingsOptions(_fbb, num_channels, num_columns_per_channel__,
+                                                     embedding_dim_per_channel__);
  }
  
  struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef LSHProjectionOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_TYPE = 4
    };
-  LSHProjectionType type() const
+  onert_tflite::LSHProjectionType type() const
    {
-    return static_cast<LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
+    return static_cast<onert_tflite::LSHProjectionType>(GetField<int8_t>(VT_TYPE, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -3005,9 +3548,10 @@ struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LSHProjectionOptionsBuilder
  {
+  typedef LSHProjectionOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_type(LSHProjectionType type)
+  void add_type(onert_tflite::LSHProjectionType type)
    {
      fbb_.AddElement<int8_t>(LSHProjectionOptions::VT_TYPE, static_cast<int8_t>(type), 0);
    }
@@ -3015,7 +3559,6 @@ struct LSHProjectionOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &);
    flatbuffers::Offset<LSHProjectionOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3024,9 +3567,9 @@ struct LSHProjectionOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<LSHProjectionOptions>
-CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                           LSHProjectionType type = LSHProjectionType_UNKNOWN)
+inline flatbuffers::Offset<LSHProjectionOptions> CreateLSHProjectionOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::LSHProjectionType type = onert_tflite::LSHProjectionType_UNKNOWN)
  {
    LSHProjectionOptionsBuilder builder_(_fbb);
    builder_.add_type(type);
@@ -3035,16 +3578,18 @@ CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SVDFOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_RANK = 4,
      VT_FUSED_ACTIVATION_FUNCTION = 6,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
    };
    int32_t rank() const { return GetField<int32_t>(VT_RANK, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool asymmetric_quantize_inputs() const
    {
@@ -3060,10 +3605,11 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SVDFOptionsBuilder
  {
+  typedef SVDFOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_rank(int32_t rank) { fbb_.AddElement<int32_t>(SVDFOptions::VT_RANK, rank, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3077,7 +3623,6 @@ struct SVDFOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &);
    flatbuffers::Offset<SVDFOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3088,7 +3633,8 @@ struct SVDFOptionsBuilder
  
  inline flatbuffers::Offset<SVDFOptions>
  CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                  onert_tflite::ActivationFunctionType fused_activation_function =
+                    onert_tflite::ActivationFunctionType_NONE,
                    bool asymmetric_quantize_inputs = false)
  {
    SVDFOptionsBuilder builder_(_fbb);
@@ -3100,14 +3646,16 @@ CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0,
  
  struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef RNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 6
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool asymmetric_quantize_inputs() const
    {
@@ -3123,9 +3671,10 @@ struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct RNNOptionsBuilder
  {
+  typedef RNNOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3139,7 +3688,6 @@ struct RNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  RNNOptionsBuilder &operator=(const RNNOptionsBuilder &);
    flatbuffers::Offset<RNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3150,7 +3698,8 @@ struct RNNOptionsBuilder
  
  inline flatbuffers::Offset<RNNOptions>
  CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
                   bool asymmetric_quantize_inputs = false)
  {
    RNNOptionsBuilder builder_(_fbb);
@@ -3161,16 +3710,18 @@ CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_TIME_MAJOR = 4,
      VT_FUSED_ACTIVATION_FUNCTION = 6,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
    };
    bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool asymmetric_quantize_inputs() const
    {
@@ -3186,6 +3737,7 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SequenceRNNOptionsBuilder
  {
+  typedef SequenceRNNOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_time_major(bool time_major)
@@ -3193,7 +3745,7 @@ struct SequenceRNNOptionsBuilder
      fbb_.AddElement<uint8_t>(SequenceRNNOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major),
                               0);
    }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3207,7 +3759,6 @@ struct SequenceRNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &);
    flatbuffers::Offset<SequenceRNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3216,10 +3767,11 @@ struct SequenceRNNOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
-  flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-  bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<SequenceRNNOptions>
+CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
+                         onert_tflite::ActivationFunctionType fused_activation_function =
+                           onert_tflite::ActivationFunctionType_NONE,
+                         bool asymmetric_quantize_inputs = false)
  {
    SequenceRNNOptionsBuilder builder_(_fbb);
    builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3230,7 +3782,8 @@ inline flatbuffers::Offset<SequenceRNNOptions> CreateSequenceRNNOptions(
  
  struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef BidirectionalSequenceRNNOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_TIME_MAJOR = 4,
      VT_FUSED_ACTIVATION_FUNCTION = 6,
@@ -3238,9 +3791,10 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
    };
    bool time_major() const { return GetField<uint8_t>(VT_TIME_MAJOR, 0) != 0; }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool merge_outputs() const { return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0; }
    bool asymmetric_quantize_inputs() const
@@ -3258,6 +3812,7 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuf
  
  struct BidirectionalSequenceRNNOptionsBuilder
  {
+  typedef BidirectionalSequenceRNNOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_time_major(bool time_major)
@@ -3265,7 +3820,7 @@ struct BidirectionalSequenceRNNOptionsBuilder
      fbb_.AddElement<uint8_t>(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR,
                               static_cast<uint8_t>(time_major), 0);
    }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3284,7 +3839,6 @@ struct BidirectionalSequenceRNNOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &);
    flatbuffers::Offset<BidirectionalSequenceRNNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3295,7 +3849,8 @@ struct BidirectionalSequenceRNNOptionsBuilder
  
  inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalSequenceRNNOptions(
    flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
    bool merge_outputs = false, bool asymmetric_quantize_inputs = false)
  {
    BidirectionalSequenceRNNOptionsBuilder builder_(_fbb);
@@ -3308,20 +3863,23 @@ inline flatbuffers::Offset<BidirectionalSequenceRNNOptions> CreateBidirectionalS
  
  struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef FullyConnectedOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4,
      VT_WEIGHTS_FORMAT = 6,
      VT_KEEP_NUM_DIMS = 8,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 10
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
-  FullyConnectedOptionsWeightsFormat weights_format() const
+  onert_tflite::FullyConnectedOptionsWeightsFormat weights_format() const
    {
-    return static_cast<FullyConnectedOptionsWeightsFormat>(GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
+    return static_cast<onert_tflite::FullyConnectedOptionsWeightsFormat>(
+      GetField<int8_t>(VT_WEIGHTS_FORMAT, 0));
    }
    bool keep_num_dims() const { return GetField<uint8_t>(VT_KEEP_NUM_DIMS, 0) != 0; }
    bool asymmetric_quantize_inputs() const
@@ -3340,14 +3898,15 @@ struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
  
  struct FullyConnectedOptionsBuilder
  {
+  typedef FullyConnectedOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
    }
-  void add_weights_format(FullyConnectedOptionsWeightsFormat weights_format)
+  void add_weights_format(onert_tflite::FullyConnectedOptionsWeightsFormat weights_format)
    {
      fbb_.AddElement<int8_t>(FullyConnectedOptions::VT_WEIGHTS_FORMAT,
                              static_cast<int8_t>(weights_format), 0);
@@ -3366,7 +3925,6 @@ struct FullyConnectedOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &);
    flatbuffers::Offset<FullyConnectedOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3375,11 +3933,13 @@ struct FullyConnectedOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
-  flatbuffers::FlatBufferBuilder &_fbb,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
-  FullyConnectedOptionsWeightsFormat weights_format = FullyConnectedOptionsWeightsFormat_DEFAULT,
-  bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
+inline flatbuffers::Offset<FullyConnectedOptions>
+CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                            onert_tflite::ActivationFunctionType fused_activation_function =
+                              onert_tflite::ActivationFunctionType_NONE,
+                            onert_tflite::FullyConnectedOptionsWeightsFormat weights_format =
+                              onert_tflite::FullyConnectedOptionsWeightsFormat_DEFAULT,
+                            bool keep_num_dims = false, bool asymmetric_quantize_inputs = false)
  {
    FullyConnectedOptionsBuilder builder_(_fbb);
    builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
@@ -3391,7 +3951,8 @@ inline flatbuffers::Offset<FullyConnectedOptions> CreateFullyConnectedOptions(
  
  struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SoftmaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_BETA = 4
    };
@@ -3405,6 +3966,7 @@ struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SoftmaxOptionsBuilder
  {
+  typedef SoftmaxOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_beta(float beta) { fbb_.AddElement<float>(SoftmaxOptions::VT_BETA, beta, 0.0f); }
@@ -3412,7 +3974,6 @@ struct SoftmaxOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &);
    flatbuffers::Offset<SoftmaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3431,15 +3992,17 @@ CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f)
  
  struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ConcatenationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_AXIS = 4,
      VT_FUSED_ACTIVATION_FUNCTION = 6
    };
    int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -3450,10 +4013,11 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ConcatenationOptionsBuilder
  {
+  typedef ConcatenationOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(ConcatenationOptions::VT_AXIS, axis, 0); }
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3462,7 +4026,6 @@ struct ConcatenationOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &);
    flatbuffers::Offset<ConcatenationOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3471,9 +4034,10 @@ struct ConcatenationOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
-  flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+inline flatbuffers::Offset<ConcatenationOptions>
+CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0,
+                           onert_tflite::ActivationFunctionType fused_activation_function =
+                             onert_tflite::ActivationFunctionType_NONE)
  {
    ConcatenationOptionsBuilder builder_(_fbb);
    builder_.add_axis(axis);
@@ -3483,35 +4047,45 @@ inline flatbuffers::Offset<ConcatenationOptions> CreateConcatenationOptions(
  
  struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef AddOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
    }
  };
  
  struct AddOptionsBuilder
  {
+  typedef AddOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(AddOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
    }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(AddOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
    explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  AddOptionsBuilder &operator=(const AddOptionsBuilder &);
    flatbuffers::Offset<AddOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3522,22 +4096,27 @@ struct AddOptionsBuilder
  
  inline flatbuffers::Offset<AddOptions>
  CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
+                 bool pot_scale_int16 = true)
  {
    AddOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
    builder_.add_fused_activation_function(fused_activation_function);
    return builder_.Finish();
  }
  
  struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef MulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -3548,9 +4127,10 @@ struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct MulOptionsBuilder
  {
+  typedef MulOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(MulOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3559,7 +4139,6 @@ struct MulOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MulOptionsBuilder &operator=(const MulOptionsBuilder &);
    flatbuffers::Offset<MulOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3570,7 +4149,8 @@ struct MulOptionsBuilder
  
  inline flatbuffers::Offset<MulOptions>
  CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE)
  {
    MulOptionsBuilder builder_(_fbb);
    builder_.add_fused_activation_function(fused_activation_function);
@@ -3579,13 +4159,15 @@ CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef L2NormOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -3596,9 +4178,10 @@ struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct L2NormOptionsBuilder
  {
+  typedef L2NormOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3607,7 +4190,6 @@ struct L2NormOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &);
    flatbuffers::Offset<L2NormOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3618,7 +4200,8 @@ struct L2NormOptionsBuilder
  
  inline flatbuffers::Offset<L2NormOptions>
  CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                    onert_tflite::ActivationFunctionType fused_activation_function =
+                      onert_tflite::ActivationFunctionType_NONE)
  {
    L2NormOptionsBuilder builder_(_fbb);
    builder_.add_fused_activation_function(fused_activation_function);
@@ -3627,7 +4210,8 @@ CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef LocalResponseNormalizationOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_RADIUS = 4,
      VT_BIAS = 6,
@@ -3648,6 +4232,7 @@ struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatb
  
  struct LocalResponseNormalizationOptionsBuilder
  {
+  typedef LocalResponseNormalizationOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_radius(int32_t radius)
@@ -3671,8 +4256,6 @@ struct LocalResponseNormalizationOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LocalResponseNormalizationOptionsBuilder &
-  operator=(const LocalResponseNormalizationOptionsBuilder &);
    flatbuffers::Offset<LocalResponseNormalizationOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3695,7 +4278,8 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, in
  
  struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef LSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4,
      VT_CELL_CLIP = 6,
@@ -3703,15 +4287,16 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_KERNEL_TYPE = 10,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
    float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
-  LSTMKernelType kernel_type() const
+  onert_tflite::LSTMKernelType kernel_type() const
    {
-    return static_cast<LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
+    return static_cast<onert_tflite::LSTMKernelType>(GetField<int8_t>(VT_KERNEL_TYPE, 0));
    }
    bool asymmetric_quantize_inputs() const
    {
@@ -3730,9 +4315,10 @@ struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LSTMOptionsBuilder
  {
+  typedef LSTMOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3745,7 +4331,7 @@ struct LSTMOptionsBuilder
    {
      fbb_.AddElement<float>(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f);
    }
-  void add_kernel_type(LSTMKernelType kernel_type)
+  void add_kernel_type(onert_tflite::LSTMKernelType kernel_type)
    {
      fbb_.AddElement<int8_t>(LSTMOptions::VT_KERNEL_TYPE, static_cast<int8_t>(kernel_type), 0);
    }
@@ -3758,7 +4344,6 @@ struct LSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &);
    flatbuffers::Offset<LSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3769,9 +4354,10 @@ struct LSTMOptionsBuilder
  
  inline flatbuffers::Offset<LSTMOptions>
  CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+                  onert_tflite::ActivationFunctionType fused_activation_function =
+                    onert_tflite::ActivationFunctionType_NONE,
                    float cell_clip = 0.0f, float proj_clip = 0.0f,
-                  LSTMKernelType kernel_type = LSTMKernelType_FULL,
+                  onert_tflite::LSTMKernelType kernel_type = onert_tflite::LSTMKernelType_FULL,
                    bool asymmetric_quantize_inputs = false)
  {
    LSTMOptionsBuilder builder_(_fbb);
@@ -3785,7 +4371,8 @@ CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef UnidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4,
      VT_CELL_CLIP = 6,
@@ -3793,9 +4380,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
      VT_TIME_MAJOR = 10,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 12
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
    float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3817,9 +4405,10 @@ struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatb
  
  struct UnidirectionalSequenceLSTMOptionsBuilder
  {
+  typedef UnidirectionalSequenceLSTMOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3847,8 +4436,6 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UnidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const UnidirectionalSequenceLSTMOptionsBuilder &);
    flatbuffers::Offset<UnidirectionalSequenceLSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3860,7 +4447,8 @@ struct UnidirectionalSequenceLSTMOptionsBuilder
  inline flatbuffers::Offset<UnidirectionalSequenceLSTMOptions>
  CreateUnidirectionalSequenceLSTMOptions(
    flatbuffers::FlatBufferBuilder &_fbb,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
    float cell_clip = 0.0f, float proj_clip = 0.0f, bool time_major = false,
    bool asymmetric_quantize_inputs = false)
  {
@@ -3875,7 +4463,8 @@ CreateUnidirectionalSequenceLSTMOptions(
  
  struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef BidirectionalSequenceLSTMOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4,
      VT_CELL_CLIP = 6,
@@ -3884,9 +4473,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
      VT_TIME_MAJOR = 12,
      VT_ASYMMETRIC_QUANTIZE_INPUTS = 14
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    float cell_clip() const { return GetField<float>(VT_CELL_CLIP, 0.0f); }
    float proj_clip() const { return GetField<float>(VT_PROJ_CLIP, 0.0f); }
@@ -3910,9 +4500,10 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
  
  struct BidirectionalSequenceLSTMOptionsBuilder
  {
+  typedef BidirectionalSequenceLSTMOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(BidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -3945,8 +4536,6 @@ struct BidirectionalSequenceLSTMOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BidirectionalSequenceLSTMOptionsBuilder &
-  operator=(const BidirectionalSequenceLSTMOptionsBuilder &);
    flatbuffers::Offset<BidirectionalSequenceLSTMOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -3957,7 +4546,8 @@ struct BidirectionalSequenceLSTMOptionsBuilder
  
  inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectionalSequenceLSTMOptions(
    flatbuffers::FlatBufferBuilder &_fbb,
-  ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
+  onert_tflite::ActivationFunctionType fused_activation_function =
+    onert_tflite::ActivationFunctionType_NONE,
    float cell_clip = 0.0f, float proj_clip = 0.0f, bool merge_outputs = false,
    bool time_major = true, bool asymmetric_quantize_inputs = false)
  {
@@ -3973,7 +4563,8 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
  
  struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ResizeBilinearOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_ALIGN_CORNERS = 8,
      VT_HALF_PIXEL_CENTERS = 10
@@ -3989,6 +4580,7 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
  
  struct ResizeBilinearOptionsBuilder
  {
+  typedef ResizeBilinearOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_align_corners(bool align_corners)
@@ -4005,7 +4597,6 @@ struct ResizeBilinearOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &);
    flatbuffers::Offset<ResizeBilinearOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4026,20 +4617,24 @@ CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_cor
  
  struct ResizeNearestNeighborOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ResizeNearestNeighborOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_ALIGN_CORNERS = 4
+    VT_ALIGN_CORNERS = 4,
+    VT_HALF_PIXEL_CENTERS = 6
    };
    bool align_corners() const { return GetField<uint8_t>(VT_ALIGN_CORNERS, 0) != 0; }
+  bool half_pixel_centers() const { return GetField<uint8_t>(VT_HALF_PIXEL_CENTERS, 0) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ALIGN_CORNERS) &&
-           verifier.EndTable();
+           VerifyField<uint8_t>(verifier, VT_HALF_PIXEL_CENTERS) && verifier.EndTable();
    }
  };
  
  struct ResizeNearestNeighborOptionsBuilder
  {
+  typedef ResizeNearestNeighborOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_align_corners(bool align_corners)
@@ -4047,11 +4642,15 @@ struct ResizeNearestNeighborOptionsBuilder
      fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_ALIGN_CORNERS,
                               static_cast<uint8_t>(align_corners), 0);
    }
+  void add_half_pixel_centers(bool half_pixel_centers)
+  {
+    fbb_.AddElement<uint8_t>(ResizeNearestNeighborOptions::VT_HALF_PIXEL_CENTERS,
+                             static_cast<uint8_t>(half_pixel_centers), 0);
+  }
    explicit ResizeNearestNeighborOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ResizeNearestNeighborOptionsBuilder &operator=(const ResizeNearestNeighborOptionsBuilder &);
    flatbuffers::Offset<ResizeNearestNeighborOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4061,16 +4660,19 @@ struct ResizeNearestNeighborOptionsBuilder
  };
  
  inline flatbuffers::Offset<ResizeNearestNeighborOptions>
-CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false)
+CreateResizeNearestNeighborOptions(flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false,
+                                   bool half_pixel_centers = false)
  {
    ResizeNearestNeighborOptionsBuilder builder_(_fbb);
+  builder_.add_half_pixel_centers(half_pixel_centers);
    builder_.add_align_corners(align_corners);
    return builder_.Finish();
  }
  
  struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef CallOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_SUBGRAPH = 4
    };
@@ -4084,6 +4686,7 @@ struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct CallOptionsBuilder
  {
+  typedef CallOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_subgraph(uint32_t subgraph)
@@ -4094,7 +4697,6 @@ struct CallOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CallOptionsBuilder &operator=(const CallOptionsBuilder &);
    flatbuffers::Offset<CallOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4113,6 +4715,7 @@ inline flatbuffers::Offset<CallOptions> CreateCallOptions(flatbuffers::FlatBuffe
  
  struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef PadOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4121,13 +4724,13 @@ struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct PadOptionsBuilder
  {
+  typedef PadOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  PadOptionsBuilder &operator=(const PadOptionsBuilder &);
    flatbuffers::Offset<PadOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4144,6 +4747,7 @@ inline flatbuffers::Offset<PadOptions> CreatePadOptions(flatbuffers::FlatBufferB
  
  struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef PadV2OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4152,13 +4756,13 @@ struct PadV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct PadV2OptionsBuilder
  {
+  typedef PadV2Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit PadV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  PadV2OptionsBuilder &operator=(const PadV2OptionsBuilder &);
    flatbuffers::Offset<PadV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4175,7 +4779,8 @@ inline flatbuffers::Offset<PadV2Options> CreatePadV2Options(flatbuffers::FlatBuf
  
  struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ReshapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NEW_SHAPE = 4
    };
@@ -4192,6 +4797,7 @@ struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ReshapeOptionsBuilder
  {
+  typedef ReshapeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_new_shape(flatbuffers::Offset<flatbuffers::Vector<int32_t>> new_shape)
@@ -4202,7 +4808,6 @@ struct ReshapeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &);
    flatbuffers::Offset<ReshapeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4224,12 +4829,13 @@ inline flatbuffers::Offset<ReshapeOptions>
  CreateReshapeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                             const std::vector<int32_t> *new_shape = nullptr)
  {
-  return onert_tflite::CreateReshapeOptions(_fbb,
-                                            new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0);
+  auto new_shape__ = new_shape ? _fbb.CreateVector<int32_t>(*new_shape) : 0;
+  return onert_tflite::CreateReshapeOptions(_fbb, new_shape__);
  }
  
  struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SpaceToBatchNDOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4238,13 +4844,13 @@ struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
  
  struct SpaceToBatchNDOptionsBuilder
  {
+  typedef SpaceToBatchNDOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &);
    flatbuffers::Offset<SpaceToBatchNDOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4262,6 +4868,7 @@ CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef BatchToSpaceNDOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4270,13 +4877,13 @@ struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
  
  struct BatchToSpaceNDOptionsBuilder
  {
+  typedef BatchToSpaceNDOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &);
    flatbuffers::Offset<BatchToSpaceNDOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4294,7 +4901,8 @@ CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SkipGramOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NGRAM_SIZE = 4,
      VT_MAX_SKIP_SIZE = 6,
@@ -4313,6 +4921,7 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SkipGramOptionsBuilder
  {
+  typedef SkipGramOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_ngram_size(int32_t ngram_size)
@@ -4332,7 +4941,6 @@ struct SkipGramOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &);
    flatbuffers::Offset<SkipGramOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4354,7 +4962,8 @@ CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size =
  
  struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SpaceToDepthOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_BLOCK_SIZE = 4
    };
@@ -4368,6 +4977,7 @@ struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SpaceToDepthOptionsBuilder
  {
+  typedef SpaceToDepthOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_block_size(int32_t block_size)
@@ -4378,7 +4988,6 @@ struct SpaceToDepthOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &);
    flatbuffers::Offset<SpaceToDepthOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4397,7 +5006,8 @@ CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
  
  struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef DepthToSpaceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_BLOCK_SIZE = 4
    };
@@ -4411,6 +5021,7 @@ struct DepthToSpaceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct DepthToSpaceOptionsBuilder
  {
+  typedef DepthToSpaceOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_block_size(int32_t block_size)
@@ -4421,7 +5032,6 @@ struct DepthToSpaceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DepthToSpaceOptionsBuilder &operator=(const DepthToSpaceOptionsBuilder &);
    flatbuffers::Offset<DepthToSpaceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4440,35 +5050,45 @@ CreateDepthToSpaceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t block_si
  
  struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SubOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_FUSED_ACTIVATION_FUNCTION = 4
+    VT_FUSED_ACTIVATION_FUNCTION = 4,
+    VT_POT_SCALE_INT16 = 6
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
+  bool pot_scale_int16() const { return GetField<uint8_t>(VT_POT_SCALE_INT16, 1) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) &&
-           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable();
+           VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
+           VerifyField<uint8_t>(verifier, VT_POT_SCALE_INT16) && verifier.EndTable();
    }
  };
  
  struct SubOptionsBuilder
  {
+  typedef SubOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(SubOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
    }
+  void add_pot_scale_int16(bool pot_scale_int16)
+  {
+    fbb_.AddElement<uint8_t>(SubOptions::VT_POT_SCALE_INT16, static_cast<uint8_t>(pot_scale_int16),
+                             1);
+  }
    explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SubOptionsBuilder &operator=(const SubOptionsBuilder &);
    flatbuffers::Offset<SubOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4479,22 +5099,27 @@ struct SubOptionsBuilder
  
  inline flatbuffers::Offset<SubOptions>
  CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE,
+                 bool pot_scale_int16 = true)
  {
    SubOptionsBuilder builder_(_fbb);
+  builder_.add_pot_scale_int16(pot_scale_int16);
    builder_.add_fused_activation_function(fused_activation_function);
    return builder_.Finish();
  }
  
  struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef DivOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_FUSED_ACTIVATION_FUNCTION = 4
    };
-  ActivationFunctionType fused_activation_function() const
+  onert_tflite::ActivationFunctionType fused_activation_function() const
    {
-    return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
+    return static_cast<onert_tflite::ActivationFunctionType>(
+      GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -4505,9 +5130,10 @@ struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct DivOptionsBuilder
  {
+  typedef DivOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_fused_activation_function(ActivationFunctionType fused_activation_function)
+  void add_fused_activation_function(onert_tflite::ActivationFunctionType fused_activation_function)
    {
      fbb_.AddElement<int8_t>(DivOptions::VT_FUSED_ACTIVATION_FUNCTION,
                              static_cast<int8_t>(fused_activation_function), 0);
@@ -4516,7 +5142,6 @@ struct DivOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  DivOptionsBuilder &operator=(const DivOptionsBuilder &);
    flatbuffers::Offset<DivOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4527,7 +5152,8 @@ struct DivOptionsBuilder
  
  inline flatbuffers::Offset<DivOptions>
  CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                 ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE)
+                 onert_tflite::ActivationFunctionType fused_activation_function =
+                   onert_tflite::ActivationFunctionType_NONE)
  {
    DivOptionsBuilder builder_(_fbb);
    builder_.add_fused_activation_function(fused_activation_function);
@@ -4536,6 +5162,7 @@ CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef TopKV2OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4544,13 +5171,13 @@ struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct TopKV2OptionsBuilder
  {
+  typedef TopKV2Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &);
    flatbuffers::Offset<TopKV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4567,13 +5194,14 @@ inline flatbuffers::Offset<TopKV2Options> CreateTopKV2Options(flatbuffers::FlatB
  
  struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef EmbeddingLookupSparseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_COMBINER = 4
    };
-  CombinerType combiner() const
+  onert_tflite::CombinerType combiner() const
    {
-    return static_cast<CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
+    return static_cast<onert_tflite::CombinerType>(GetField<int8_t>(VT_COMBINER, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -4584,9 +5212,10 @@ struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffer
  
  struct EmbeddingLookupSparseOptionsBuilder
  {
+  typedef EmbeddingLookupSparseOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_combiner(CombinerType combiner)
+  void add_combiner(onert_tflite::CombinerType combiner)
    {
      fbb_.AddElement<int8_t>(EmbeddingLookupSparseOptions::VT_COMBINER,
                              static_cast<int8_t>(combiner), 0);
@@ -4595,7 +5224,6 @@ struct EmbeddingLookupSparseOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &);
    flatbuffers::Offset<EmbeddingLookupSparseOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4604,9 +5232,9 @@ struct EmbeddingLookupSparseOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<EmbeddingLookupSparseOptions>
-CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                   CombinerType combiner = CombinerType_SUM)
+inline flatbuffers::Offset<EmbeddingLookupSparseOptions> CreateEmbeddingLookupSparseOptions(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  onert_tflite::CombinerType combiner = onert_tflite::CombinerType_SUM)
  {
    EmbeddingLookupSparseOptionsBuilder builder_(_fbb);
    builder_.add_combiner(combiner);
@@ -4615,28 +5243,35 @@ CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef GatherOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_AXIS = 4
+    VT_AXIS = 4,
+    VT_BATCH_DIMS = 6
    };
    int32_t axis() const { return GetField<int32_t>(VT_AXIS, 0); }
+  int32_t batch_dims() const { return GetField<int32_t>(VT_BATCH_DIMS, 0); }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_AXIS) &&
-           verifier.EndTable();
+           VerifyField<int32_t>(verifier, VT_BATCH_DIMS) && verifier.EndTable();
    }
  };
  
  struct GatherOptionsBuilder
  {
+  typedef GatherOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(GatherOptions::VT_AXIS, axis, 0); }
+  void add_batch_dims(int32_t batch_dims)
+  {
+    fbb_.AddElement<int32_t>(GatherOptions::VT_BATCH_DIMS, batch_dims, 0);
+  }
    explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  GatherOptionsBuilder &operator=(const GatherOptionsBuilder &);
    flatbuffers::Offset<GatherOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4645,16 +5280,18 @@ struct GatherOptionsBuilder
    }
  };
  
-inline flatbuffers::Offset<GatherOptions> CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                                                              int32_t axis = 0)
+inline flatbuffers::Offset<GatherOptions>
+CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, int32_t batch_dims = 0)
  {
    GatherOptionsBuilder builder_(_fbb);
+  builder_.add_batch_dims(batch_dims);
    builder_.add_axis(axis);
    return builder_.Finish();
  }
  
  struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef TransposeOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4663,13 +5300,13 @@ struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct TransposeOptionsBuilder
  {
+  typedef TransposeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &);
    flatbuffers::Offset<TransposeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4687,6 +5324,7 @@ CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef ExpOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4695,13 +5333,13 @@ struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ExpOptionsBuilder
  {
+  typedef ExpOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ExpOptionsBuilder &operator=(const ExpOptionsBuilder &);
    flatbuffers::Offset<ExpOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4718,6 +5356,7 @@ inline flatbuffers::Offset<ExpOptions> CreateExpOptions(flatbuffers::FlatBufferB
  
  struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef CosOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -4726,13 +5365,13 @@ struct CosOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct CosOptionsBuilder
  {
+  typedef CosOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit CosOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  CosOptionsBuilder &operator=(const CosOptionsBuilder &);
    flatbuffers::Offset<CosOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4749,7 +5388,8 @@ inline flatbuffers::Offset<CosOptions> CreateCosOptions(flatbuffers::FlatBufferB
  
  struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ReducerOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_KEEP_DIMS = 4
    };
@@ -4763,6 +5403,7 @@ struct ReducerOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ReducerOptionsBuilder
  {
+  typedef ReducerOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_keep_dims(bool keep_dims)
@@ -4773,7 +5414,6 @@ struct ReducerOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReducerOptionsBuilder &operator=(const ReducerOptionsBuilder &);
    flatbuffers::Offset<ReducerOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4792,7 +5432,8 @@ CreateReducerOptions(flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = fals
  
  struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SqueezeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_SQUEEZE_DIMS = 4
    };
@@ -4809,6 +5450,7 @@ struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SqueezeOptionsBuilder
  {
+  typedef SqueezeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_squeeze_dims(flatbuffers::Offset<flatbuffers::Vector<int32_t>> squeeze_dims)
@@ -4819,7 +5461,6 @@ struct SqueezeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &);
    flatbuffers::Offset<SqueezeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4841,13 +5482,14 @@ inline flatbuffers::Offset<SqueezeOptions>
  CreateSqueezeOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb,
                             const std::vector<int32_t> *squeeze_dims = nullptr)
  {
-  return onert_tflite::CreateSqueezeOptions(
-    _fbb, squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0);
+  auto squeeze_dims__ = squeeze_dims ? _fbb.CreateVector<int32_t>(*squeeze_dims) : 0;
+  return onert_tflite::CreateSqueezeOptions(_fbb, squeeze_dims__);
  }
  
  struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SplitOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NUM_SPLITS = 4
    };
@@ -4861,6 +5503,7 @@ struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SplitOptionsBuilder
  {
+  typedef SplitOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_num_splits(int32_t num_splits)
@@ -4871,7 +5514,6 @@ struct SplitOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SplitOptionsBuilder &operator=(const SplitOptionsBuilder &);
    flatbuffers::Offset<SplitOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4890,7 +5532,8 @@ inline flatbuffers::Offset<SplitOptions> CreateSplitOptions(flatbuffers::FlatBuf
  
  struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SplitVOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NUM_SPLITS = 4
    };
@@ -4904,6 +5547,7 @@ struct SplitVOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SplitVOptionsBuilder
  {
+  typedef SplitVOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_num_splits(int32_t num_splits)
@@ -4914,7 +5558,6 @@ struct SplitVOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SplitVOptionsBuilder &operator=(const SplitVOptionsBuilder &);
    flatbuffers::Offset<SplitVOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -4933,7 +5576,8 @@ inline flatbuffers::Offset<SplitVOptions> CreateSplitVOptions(flatbuffers::FlatB
  
  struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef StridedSliceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_BEGIN_MASK = 4,
      VT_END_MASK = 6,
@@ -4958,6 +5602,7 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct StridedSliceOptionsBuilder
  {
+  typedef StridedSliceOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_begin_mask(int32_t begin_mask)
@@ -4984,7 +5629,6 @@ struct StridedSliceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &);
    flatbuffers::Offset<StridedSliceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5009,6 +5653,7 @@ CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_ma
  
  struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LogSoftmaxOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5017,13 +5662,13 @@ struct LogSoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LogSoftmaxOptionsBuilder
  {
+  typedef LogSoftmaxOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LogSoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LogSoftmaxOptionsBuilder &operator=(const LogSoftmaxOptionsBuilder &);
    flatbuffers::Offset<LogSoftmaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5041,18 +5686,19 @@ CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef CastOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_IN_DATA_TYPE = 4,
      VT_OUT_DATA_TYPE = 6
    };
-  TensorType in_data_type() const
+  onert_tflite::TensorType in_data_type() const
    {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IN_DATA_TYPE, 0));
    }
-  TensorType out_data_type() const
+  onert_tflite::TensorType out_data_type() const
    {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_DATA_TYPE, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -5063,13 +5709,14 @@ struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct CastOptionsBuilder
  {
+  typedef CastOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_in_data_type(TensorType in_data_type)
+  void add_in_data_type(onert_tflite::TensorType in_data_type)
    {
      fbb_.AddElement<int8_t>(CastOptions::VT_IN_DATA_TYPE, static_cast<int8_t>(in_data_type), 0);
    }
-  void add_out_data_type(TensorType out_data_type)
+  void add_out_data_type(onert_tflite::TensorType out_data_type)
    {
      fbb_.AddElement<int8_t>(CastOptions::VT_OUT_DATA_TYPE, static_cast<int8_t>(out_data_type), 0);
    }
@@ -5077,7 +5724,6 @@ struct CastOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  CastOptionsBuilder &operator=(const CastOptionsBuilder &);
    flatbuffers::Offset<CastOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5088,8 +5734,8 @@ struct CastOptionsBuilder
  
  inline flatbuffers::Offset<CastOptions>
  CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                  TensorType in_data_type = TensorType_FLOAT32,
-                  TensorType out_data_type = TensorType_FLOAT32)
+                  onert_tflite::TensorType in_data_type = onert_tflite::TensorType_FLOAT32,
+                  onert_tflite::TensorType out_data_type = onert_tflite::TensorType_FLOAT32)
  {
    CastOptionsBuilder builder_(_fbb);
    builder_.add_out_data_type(out_data_type);
@@ -5099,6 +5745,7 @@ CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef DequantizeOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5107,13 +5754,13 @@ struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct DequantizeOptionsBuilder
  {
+  typedef DequantizeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &);
    flatbuffers::Offset<DequantizeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5131,6 +5778,7 @@ CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef MaximumMinimumOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5139,13 +5787,13 @@ struct MaximumMinimumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl
  
  struct MaximumMinimumOptionsBuilder
  {
+  typedef MaximumMinimumOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit MaximumMinimumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  MaximumMinimumOptionsBuilder &operator=(const MaximumMinimumOptionsBuilder &);
    flatbuffers::Offset<MaximumMinimumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5163,6 +5811,7 @@ CreateMaximumMinimumOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef TileOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5171,13 +5820,13 @@ struct TileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct TileOptionsBuilder
  {
+  typedef TileOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit TileOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  TileOptionsBuilder &operator=(const TileOptionsBuilder &);
    flatbuffers::Offset<TileOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5194,13 +5843,14 @@ inline flatbuffers::Offset<TileOptions> CreateTileOptions(flatbuffers::FlatBuffe
  
  struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ArgMaxOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_OUTPUT_TYPE = 4
    };
-  TensorType output_type() const
+  onert_tflite::TensorType output_type() const
    {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -5211,9 +5861,10 @@ struct ArgMaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ArgMaxOptionsBuilder
  {
+  typedef ArgMaxOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(onert_tflite::TensorType output_type)
    {
      fbb_.AddElement<int8_t>(ArgMaxOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
    }
@@ -5221,7 +5872,6 @@ struct ArgMaxOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ArgMaxOptionsBuilder &operator=(const ArgMaxOptionsBuilder &);
    flatbuffers::Offset<ArgMaxOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5232,7 +5882,7 @@ struct ArgMaxOptionsBuilder
  
  inline flatbuffers::Offset<ArgMaxOptions>
  CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
  {
    ArgMaxOptionsBuilder builder_(_fbb);
    builder_.add_output_type(output_type);
@@ -5241,13 +5891,14 @@ CreateArgMaxOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ArgMinOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_OUTPUT_TYPE = 4
    };
-  TensorType output_type() const
+  onert_tflite::TensorType output_type() const
    {
-    return static_cast<TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUTPUT_TYPE, 0));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -5258,9 +5909,10 @@ struct ArgMinOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ArgMinOptionsBuilder
  {
+  typedef ArgMinOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_output_type(TensorType output_type)
+  void add_output_type(onert_tflite::TensorType output_type)
    {
      fbb_.AddElement<int8_t>(ArgMinOptions::VT_OUTPUT_TYPE, static_cast<int8_t>(output_type), 0);
    }
@@ -5268,7 +5920,6 @@ struct ArgMinOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ArgMinOptionsBuilder &operator=(const ArgMinOptionsBuilder &);
    flatbuffers::Offset<ArgMinOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5279,7 +5930,7 @@ struct ArgMinOptionsBuilder
  
  inline flatbuffers::Offset<ArgMinOptions>
  CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType output_type = TensorType_FLOAT32)
+                    onert_tflite::TensorType output_type = onert_tflite::TensorType_FLOAT32)
  {
    ArgMinOptionsBuilder builder_(_fbb);
    builder_.add_output_type(output_type);
@@ -5288,6 +5939,7 @@ CreateArgMinOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef GreaterOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5296,13 +5948,13 @@ struct GreaterOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct GreaterOptionsBuilder
  {
+  typedef GreaterOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit GreaterOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  GreaterOptionsBuilder &operator=(const GreaterOptionsBuilder &);
    flatbuffers::Offset<GreaterOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5320,6 +5972,7 @@ CreateGreaterOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef GreaterEqualOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5328,13 +5981,13 @@ struct GreaterEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct GreaterEqualOptionsBuilder
  {
+  typedef GreaterEqualOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit GreaterEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  GreaterEqualOptionsBuilder &operator=(const GreaterEqualOptionsBuilder &);
    flatbuffers::Offset<GreaterEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5352,6 +6005,7 @@ CreateGreaterEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LessOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5360,13 +6014,13 @@ struct LessOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LessOptionsBuilder
  {
+  typedef LessOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LessOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LessOptionsBuilder &operator=(const LessOptionsBuilder &);
    flatbuffers::Offset<LessOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5383,6 +6037,7 @@ inline flatbuffers::Offset<LessOptions> CreateLessOptions(flatbuffers::FlatBuffe
  
  struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LessEqualOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5391,13 +6046,13 @@ struct LessEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LessEqualOptionsBuilder
  {
+  typedef LessEqualOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LessEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LessEqualOptionsBuilder &operator=(const LessEqualOptionsBuilder &);
    flatbuffers::Offset<LessEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5415,6 +6070,7 @@ CreateLessEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef NegOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5423,13 +6079,13 @@ struct NegOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct NegOptionsBuilder
  {
+  typedef NegOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit NegOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  NegOptionsBuilder &operator=(const NegOptionsBuilder &);
    flatbuffers::Offset<NegOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5446,6 +6102,7 @@ inline flatbuffers::Offset<NegOptions> CreateNegOptions(flatbuffers::FlatBufferB
  
  struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SelectOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5454,13 +6111,13 @@ struct SelectOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SelectOptionsBuilder
  {
+  typedef SelectOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SelectOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SelectOptionsBuilder &operator=(const SelectOptionsBuilder &);
    flatbuffers::Offset<SelectOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5477,6 +6134,7 @@ inline flatbuffers::Offset<SelectOptions> CreateSelectOptions(flatbuffers::FlatB
  
  struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SliceOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5485,13 +6143,13 @@ struct SliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SliceOptionsBuilder
  {
+  typedef SliceOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SliceOptionsBuilder &operator=(const SliceOptionsBuilder &);
    flatbuffers::Offset<SliceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5508,13 +6166,17 @@ inline flatbuffers::Offset<SliceOptions> CreateSliceOptions(flatbuffers::FlatBuf
  
  struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef TransposeConvOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_PADDING = 4,
      VT_STRIDE_W = 6,
      VT_STRIDE_H = 8
    };
-  Padding padding() const { return static_cast<Padding>(GetField<int8_t>(VT_PADDING, 0)); }
+  onert_tflite::Padding padding() const
+  {
+    return static_cast<onert_tflite::Padding>(GetField<int8_t>(VT_PADDING, 0));
+  }
    int32_t stride_w() const { return GetField<int32_t>(VT_STRIDE_W, 0); }
    int32_t stride_h() const { return GetField<int32_t>(VT_STRIDE_H, 0); }
    bool Verify(flatbuffers::Verifier &verifier) const
@@ -5527,9 +6189,10 @@ struct TransposeConvOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct TransposeConvOptionsBuilder
  {
+  typedef TransposeConvOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_padding(Padding padding)
+  void add_padding(onert_tflite::Padding padding)
    {
      fbb_.AddElement<int8_t>(TransposeConvOptions::VT_PADDING, static_cast<int8_t>(padding), 0);
    }
@@ -5545,7 +6208,6 @@ struct TransposeConvOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  TransposeConvOptionsBuilder &operator=(const TransposeConvOptionsBuilder &);
    flatbuffers::Offset<TransposeConvOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5555,7 +6217,8 @@ struct TransposeConvOptionsBuilder
  };
  
  inline flatbuffers::Offset<TransposeConvOptions>
-CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME,
+CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                           onert_tflite::Padding padding = onert_tflite::Padding_SAME,
                             int32_t stride_w = 0, int32_t stride_h = 0)
  {
    TransposeConvOptionsBuilder builder_(_fbb);
@@ -5567,6 +6230,7 @@ CreateTransposeConvOptions(flatbuffers::FlatBufferBuilder &_fbb, Padding padding
  
  struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef ExpandDimsOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5575,13 +6239,13 @@ struct ExpandDimsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ExpandDimsOptionsBuilder
  {
+  typedef ExpandDimsOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit ExpandDimsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ExpandDimsOptionsBuilder &operator=(const ExpandDimsOptionsBuilder &);
    flatbuffers::Offset<ExpandDimsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5599,7 +6263,8 @@ CreateExpandDimsOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SparseToDenseOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VALIDATE_INDICES = 4
    };
@@ -5613,6 +6278,7 @@ struct SparseToDenseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SparseToDenseOptionsBuilder
  {
+  typedef SparseToDenseOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_validate_indices(bool validate_indices)
@@ -5624,7 +6290,6 @@ struct SparseToDenseOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SparseToDenseOptionsBuilder &operator=(const SparseToDenseOptionsBuilder &);
    flatbuffers::Offset<SparseToDenseOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5643,6 +6308,7 @@ CreateSparseToDenseOptions(flatbuffers::FlatBufferBuilder &_fbb, bool validate_i
  
  struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef EqualOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5651,13 +6317,13 @@ struct EqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct EqualOptionsBuilder
  {
+  typedef EqualOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit EqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  EqualOptionsBuilder &operator=(const EqualOptionsBuilder &);
    flatbuffers::Offset<EqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5674,6 +6340,7 @@ inline flatbuffers::Offset<EqualOptions> CreateEqualOptions(flatbuffers::FlatBuf
  
  struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef NotEqualOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5682,13 +6349,13 @@ struct NotEqualOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct NotEqualOptionsBuilder
  {
+  typedef NotEqualOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit NotEqualOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  NotEqualOptionsBuilder &operator=(const NotEqualOptionsBuilder &);
    flatbuffers::Offset<NotEqualOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5706,11 +6373,15 @@ CreateNotEqualOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ShapeOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_OUT_TYPE = 4
    };
-  TensorType out_type() const { return static_cast<TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0)); }
+  onert_tflite::TensorType out_type() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_OUT_TYPE, 0));
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_OUT_TYPE) &&
@@ -5720,9 +6391,10 @@ struct ShapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ShapeOptionsBuilder
  {
+  typedef ShapeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_out_type(TensorType out_type)
+  void add_out_type(onert_tflite::TensorType out_type)
    {
      fbb_.AddElement<int8_t>(ShapeOptions::VT_OUT_TYPE, static_cast<int8_t>(out_type), 0);
    }
@@ -5730,7 +6402,6 @@ struct ShapeOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ShapeOptionsBuilder &operator=(const ShapeOptionsBuilder &);
    flatbuffers::Offset<ShapeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5740,7 +6411,8 @@ struct ShapeOptionsBuilder
  };
  
  inline flatbuffers::Offset<ShapeOptions>
-CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = TensorType_FLOAT32)
+CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                   onert_tflite::TensorType out_type = onert_tflite::TensorType_FLOAT32)
  {
    ShapeOptionsBuilder builder_(_fbb);
    builder_.add_out_type(out_type);
@@ -5749,6 +6421,7 @@ CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, TensorType out_type = T
  
  struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef RankOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5757,13 +6430,13 @@ struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct RankOptionsBuilder
  {
+  typedef RankOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  RankOptionsBuilder &operator=(const RankOptionsBuilder &);
    flatbuffers::Offset<RankOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5780,6 +6453,7 @@ inline flatbuffers::Offset<RankOptions> CreateRankOptions(flatbuffers::FlatBuffe
  
  struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef PowOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5788,13 +6462,13 @@ struct PowOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct PowOptionsBuilder
  {
+  typedef PowOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit PowOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  PowOptionsBuilder &operator=(const PowOptionsBuilder &);
    flatbuffers::Offset<PowOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5811,7 +6485,8 @@ inline flatbuffers::Offset<PowOptions> CreatePowOptions(flatbuffers::FlatBufferB
  
  struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef FakeQuantOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_MIN = 4,
      VT_MAX = 6,
@@ -5832,6 +6507,7 @@ struct FakeQuantOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct FakeQuantOptionsBuilder
  {
+  typedef FakeQuantOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_min(float min) { fbb_.AddElement<float>(FakeQuantOptions::VT_MIN, min, 0.0f); }
@@ -5849,7 +6525,6 @@ struct FakeQuantOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  FakeQuantOptionsBuilder &operator=(const FakeQuantOptionsBuilder &);
    flatbuffers::Offset<FakeQuantOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5872,7 +6547,8 @@ CreateFakeQuantOptions(flatbuffers::FlatBufferBuilder &_fbb, float min = 0.0f, f
  
  struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef PackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VALUES_COUNT = 4,
      VT_AXIS = 6
@@ -5888,6 +6564,7 @@ struct PackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct PackOptionsBuilder
  {
+  typedef PackOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_values_count(int32_t values_count)
@@ -5899,7 +6576,6 @@ struct PackOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  PackOptionsBuilder &operator=(const PackOptionsBuilder &);
    flatbuffers::Offset<PackOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5919,6 +6595,7 @@ CreatePackOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t values_count = 0
  
  struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LogicalOrOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5927,13 +6604,13 @@ struct LogicalOrOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LogicalOrOptionsBuilder
  {
+  typedef LogicalOrOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LogicalOrOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LogicalOrOptionsBuilder &operator=(const LogicalOrOptionsBuilder &);
    flatbuffers::Offset<LogicalOrOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5951,7 +6628,8 @@ CreateLogicalOrOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef OneHotOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_AXIS = 4
    };
@@ -5965,6 +6643,7 @@ struct OneHotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct OneHotOptionsBuilder
  {
+  typedef OneHotOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_axis(int32_t axis) { fbb_.AddElement<int32_t>(OneHotOptions::VT_AXIS, axis, 0); }
@@ -5972,7 +6651,6 @@ struct OneHotOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  OneHotOptionsBuilder &operator=(const OneHotOptionsBuilder &);
    flatbuffers::Offset<OneHotOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -5991,6 +6669,7 @@ inline flatbuffers::Offset<OneHotOptions> CreateOneHotOptions(flatbuffers::FlatB
  
  struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef AbsOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -5999,13 +6678,13 @@ struct AbsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct AbsOptionsBuilder
  {
+  typedef AbsOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit AbsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  AbsOptionsBuilder &operator=(const AbsOptionsBuilder &);
    flatbuffers::Offset<AbsOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6022,6 +6701,7 @@ inline flatbuffers::Offset<AbsOptions> CreateAbsOptions(flatbuffers::FlatBufferB
  
  struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef HardSwishOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6030,13 +6710,13 @@ struct HardSwishOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct HardSwishOptionsBuilder
  {
+  typedef HardSwishOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit HardSwishOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  HardSwishOptionsBuilder &operator=(const HardSwishOptionsBuilder &);
    flatbuffers::Offset<HardSwishOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6054,6 +6734,7 @@ CreateHardSwishOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LogicalAndOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6062,13 +6743,13 @@ struct LogicalAndOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LogicalAndOptionsBuilder
  {
+  typedef LogicalAndOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LogicalAndOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LogicalAndOptionsBuilder &operator=(const LogicalAndOptionsBuilder &);
    flatbuffers::Offset<LogicalAndOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6086,6 +6767,7 @@ CreateLogicalAndOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef LogicalNotOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6094,13 +6776,13 @@ struct LogicalNotOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LogicalNotOptionsBuilder
  {
+  typedef LogicalNotOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit LogicalNotOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  LogicalNotOptionsBuilder &operator=(const LogicalNotOptionsBuilder &);
    flatbuffers::Offset<LogicalNotOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6118,7 +6800,8 @@ CreateLogicalNotOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef UnpackOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NUM = 4,
      VT_AXIS = 6
@@ -6134,6 +6817,7 @@ struct UnpackOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct UnpackOptionsBuilder
  {
+  typedef UnpackOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_num(int32_t num) { fbb_.AddElement<int32_t>(UnpackOptions::VT_NUM, num, 0); }
@@ -6142,7 +6826,6 @@ struct UnpackOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UnpackOptionsBuilder &operator=(const UnpackOptionsBuilder &);
    flatbuffers::Offset<UnpackOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6162,6 +6845,7 @@ inline flatbuffers::Offset<UnpackOptions> CreateUnpackOptions(flatbuffers::FlatB
  
  struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef FloorDivOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6170,13 +6854,13 @@ struct FloorDivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct FloorDivOptionsBuilder
  {
+  typedef FloorDivOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit FloorDivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  FloorDivOptionsBuilder &operator=(const FloorDivOptionsBuilder &);
    flatbuffers::Offset<FloorDivOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6194,6 +6878,7 @@ CreateFloorDivOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SquareOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6202,13 +6887,13 @@ struct SquareOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SquareOptionsBuilder
  {
+  typedef SquareOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SquareOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SquareOptionsBuilder &operator=(const SquareOptionsBuilder &);
    flatbuffers::Offset<SquareOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6225,6 +6910,7 @@ inline flatbuffers::Offset<SquareOptions> CreateSquareOptions(flatbuffers::FlatB
  
  struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef ZerosLikeOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6233,13 +6919,13 @@ struct ZerosLikeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ZerosLikeOptionsBuilder
  {
+  typedef ZerosLikeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit ZerosLikeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ZerosLikeOptionsBuilder &operator=(const ZerosLikeOptionsBuilder &);
    flatbuffers::Offset<ZerosLikeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6257,6 +6943,7 @@ CreateZerosLikeOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef FillOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6265,13 +6952,13 @@ struct FillOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct FillOptionsBuilder
  {
+  typedef FillOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit FillOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  FillOptionsBuilder &operator=(const FillOptionsBuilder &);
    flatbuffers::Offset<FillOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6288,6 +6975,7 @@ inline flatbuffers::Offset<FillOptions> CreateFillOptions(flatbuffers::FlatBuffe
  
  struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef FloorModOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6296,13 +6984,13 @@ struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct FloorModOptionsBuilder
  {
+  typedef FloorModOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &);
    flatbuffers::Offset<FloorModOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6320,6 +7008,7 @@ CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef RangeOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6328,13 +7017,13 @@ struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct RangeOptionsBuilder
  {
+  typedef RangeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  RangeOptionsBuilder &operator=(const RangeOptionsBuilder &);
    flatbuffers::Offset<RangeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6351,7 +7040,8 @@ inline flatbuffers::Offset<RangeOptions> CreateRangeOptions(flatbuffers::FlatBuf
  
  struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef LeakyReluOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_ALPHA = 4
    };
@@ -6365,6 +7055,7 @@ struct LeakyReluOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct LeakyReluOptionsBuilder
  {
+  typedef LeakyReluOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_alpha(float alpha) { fbb_.AddElement<float>(LeakyReluOptions::VT_ALPHA, alpha, 0.0f); }
@@ -6372,7 +7063,6 @@ struct LeakyReluOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  LeakyReluOptionsBuilder &operator=(const LeakyReluOptionsBuilder &);
    flatbuffers::Offset<LeakyReluOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6391,6 +7081,7 @@ CreateLeakyReluOptions(flatbuffers::FlatBufferBuilder &_fbb, float alpha = 0.0f)
  
  struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SquaredDifferenceOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6399,13 +7090,13 @@ struct SquaredDifferenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::T
  
  struct SquaredDifferenceOptionsBuilder
  {
+  typedef SquaredDifferenceOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SquaredDifferenceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SquaredDifferenceOptionsBuilder &operator=(const SquaredDifferenceOptionsBuilder &);
    flatbuffers::Offset<SquaredDifferenceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6423,11 +7114,15 @@ CreateSquaredDifferenceOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef MirrorPadOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_MODE = 4
    };
-  MirrorPadMode mode() const { return static_cast<MirrorPadMode>(GetField<int8_t>(VT_MODE, 0)); }
+  onert_tflite::MirrorPadMode mode() const
+  {
+    return static_cast<onert_tflite::MirrorPadMode>(GetField<int8_t>(VT_MODE, 0));
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_MODE) &&
@@ -6437,9 +7132,10 @@ struct MirrorPadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct MirrorPadOptionsBuilder
  {
+  typedef MirrorPadOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_mode(MirrorPadMode mode)
+  void add_mode(onert_tflite::MirrorPadMode mode)
    {
      fbb_.AddElement<int8_t>(MirrorPadOptions::VT_MODE, static_cast<int8_t>(mode), 0);
    }
@@ -6447,7 +7143,6 @@ struct MirrorPadOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MirrorPadOptionsBuilder &operator=(const MirrorPadOptionsBuilder &);
    flatbuffers::Offset<MirrorPadOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6458,7 +7153,7 @@ struct MirrorPadOptionsBuilder
  
  inline flatbuffers::Offset<MirrorPadOptions>
  CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                       MirrorPadMode mode = MirrorPadMode_REFLECT)
+                       onert_tflite::MirrorPadMode mode = onert_tflite::MirrorPadMode_REFLECT)
  {
    MirrorPadOptionsBuilder builder_(_fbb);
    builder_.add_mode(mode);
@@ -6467,13 +7162,14 @@ CreateMirrorPadOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef UniqueOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_IDX_OUT_TYPE = 4
    };
-  TensorType idx_out_type() const
+  onert_tflite::TensorType idx_out_type() const
    {
-    return static_cast<TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_IDX_OUT_TYPE, 2));
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -6484,9 +7180,10 @@ struct UniqueOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct UniqueOptionsBuilder
  {
+  typedef UniqueOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_idx_out_type(TensorType idx_out_type)
+  void add_idx_out_type(onert_tflite::TensorType idx_out_type)
    {
      fbb_.AddElement<int8_t>(UniqueOptions::VT_IDX_OUT_TYPE, static_cast<int8_t>(idx_out_type), 2);
    }
@@ -6494,7 +7191,6 @@ struct UniqueOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  UniqueOptionsBuilder &operator=(const UniqueOptionsBuilder &);
    flatbuffers::Offset<UniqueOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6505,7 +7201,7 @@ struct UniqueOptionsBuilder
  
  inline flatbuffers::Offset<UniqueOptions>
  CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
-                    TensorType idx_out_type = TensorType_INT32)
+                    onert_tflite::TensorType idx_out_type = onert_tflite::TensorType_INT32)
  {
    UniqueOptionsBuilder builder_(_fbb);
    builder_.add_idx_out_type(idx_out_type);
@@ -6514,6 +7210,7 @@ CreateUniqueOptions(flatbuffers::FlatBufferBuilder &_fbb,
  
  struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef ReverseV2OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6522,13 +7219,13 @@ struct ReverseV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ReverseV2OptionsBuilder
  {
+  typedef ReverseV2Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit ReverseV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ReverseV2OptionsBuilder &operator=(const ReverseV2OptionsBuilder &);
    flatbuffers::Offset<ReverseV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6546,6 +7243,7 @@ CreateReverseV2Options(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef AddNOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6554,13 +7252,13 @@ struct AddNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct AddNOptionsBuilder
  {
+  typedef AddNOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit AddNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  AddNOptionsBuilder &operator=(const AddNOptionsBuilder &);
    flatbuffers::Offset<AddNOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6577,6 +7275,7 @@ inline flatbuffers::Offset<AddNOptions> CreateAddNOptions(flatbuffers::FlatBuffe
  
  struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef GatherNdOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6585,13 +7284,13 @@ struct GatherNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct GatherNdOptionsBuilder
  {
+  typedef GatherNdOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit GatherNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  GatherNdOptionsBuilder &operator=(const GatherNdOptionsBuilder &);
    flatbuffers::Offset<GatherNdOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6609,6 +7308,7 @@ CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef WhereOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6617,13 +7317,13 @@ struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct WhereOptionsBuilder
  {
+  typedef WhereOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  WhereOptionsBuilder &operator=(const WhereOptionsBuilder &);
    flatbuffers::Offset<WhereOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6640,7 +7340,8 @@ inline flatbuffers::Offset<WhereOptions> CreateWhereOptions(flatbuffers::FlatBuf
  
  struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ReverseSequenceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_SEQ_DIM = 4,
      VT_BATCH_DIM = 6
@@ -6656,6 +7357,7 @@ struct ReverseSequenceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tab
  
  struct ReverseSequenceOptionsBuilder
  {
+  typedef ReverseSequenceOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_seq_dim(int32_t seq_dim)
@@ -6670,7 +7372,6 @@ struct ReverseSequenceOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  ReverseSequenceOptionsBuilder &operator=(const ReverseSequenceOptionsBuilder &);
    flatbuffers::Offset<ReverseSequenceOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6691,6 +7392,7 @@ CreateReverseSequenceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t seq_d
  
  struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef MatrixDiagOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6699,13 +7401,13 @@ struct MatrixDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct MatrixDiagOptionsBuilder
  {
+  typedef MatrixDiagOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit MatrixDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  MatrixDiagOptionsBuilder &operator=(const MatrixDiagOptionsBuilder &);
    flatbuffers::Offset<MatrixDiagOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6723,6 +7425,7 @@ CreateMatrixDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef QuantizeOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6731,13 +7434,13 @@ struct QuantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct QuantizeOptionsBuilder
  {
+  typedef QuantizeOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit QuantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  QuantizeOptionsBuilder &operator=(const QuantizeOptionsBuilder &);
    flatbuffers::Offset<QuantizeOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6755,6 +7458,7 @@ CreateQuantizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef MatrixSetDiagOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6763,13 +7467,13 @@ struct MatrixSetDiagOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct MatrixSetDiagOptionsBuilder
  {
+  typedef MatrixSetDiagOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit MatrixSetDiagOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  MatrixSetDiagOptionsBuilder &operator=(const MatrixSetDiagOptionsBuilder &);
    flatbuffers::Offset<MatrixSetDiagOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6787,7 +7491,8 @@ CreateMatrixSetDiagOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef IfOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_THEN_SUBGRAPH_INDEX = 4,
      VT_ELSE_SUBGRAPH_INDEX = 6
@@ -6803,6 +7508,7 @@ struct IfOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct IfOptionsBuilder
  {
+  typedef IfOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_then_subgraph_index(int32_t then_subgraph_index)
@@ -6817,7 +7523,6 @@ struct IfOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  IfOptionsBuilder &operator=(const IfOptionsBuilder &);
    flatbuffers::Offset<IfOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6836,9 +7541,54 @@ inline flatbuffers::Offset<IfOptions> CreateIfOptions(flatbuffers::FlatBufferBui
    return builder_.Finish();
  }
  
+struct CallOnceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef CallOnceOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INIT_SUBGRAPH_INDEX = 4
+  };
+  int32_t init_subgraph_index() const { return GetField<int32_t>(VT_INIT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_INIT_SUBGRAPH_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct CallOnceOptionsBuilder
+{
+  typedef CallOnceOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_init_subgraph_index(int32_t init_subgraph_index)
+  {
+    fbb_.AddElement<int32_t>(CallOnceOptions::VT_INIT_SUBGRAPH_INDEX, init_subgraph_index, 0);
+  }
+  explicit CallOnceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<CallOnceOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<CallOnceOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<CallOnceOptions>
+CreateCallOnceOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t init_subgraph_index = 0)
+{
+  CallOnceOptionsBuilder builder_(_fbb);
+  builder_.add_init_subgraph_index(init_subgraph_index);
+  return builder_.Finish();
+}
+
  struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef WhileOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_COND_SUBGRAPH_INDEX = 4,
      VT_BODY_SUBGRAPH_INDEX = 6
@@ -6854,6 +7604,7 @@ struct WhileOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct WhileOptionsBuilder
  {
+  typedef WhileOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_cond_subgraph_index(int32_t cond_subgraph_index)
@@ -6868,7 +7619,6 @@ struct WhileOptionsBuilder
    {
      start_ = fbb_.StartTable();
    }
-  WhileOptionsBuilder &operator=(const WhileOptionsBuilder &);
    flatbuffers::Offset<WhileOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6889,6 +7639,7 @@ inline flatbuffers::Offset<WhileOptions> CreateWhileOptions(flatbuffers::FlatBuf
  
  struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef NonMaxSuppressionV4OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6897,13 +7648,13 @@ struct NonMaxSuppressionV4Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
  
  struct NonMaxSuppressionV4OptionsBuilder
  {
+  typedef NonMaxSuppressionV4Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit NonMaxSuppressionV4OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  NonMaxSuppressionV4OptionsBuilder &operator=(const NonMaxSuppressionV4OptionsBuilder &);
    flatbuffers::Offset<NonMaxSuppressionV4Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6921,6 +7672,7 @@ CreateNonMaxSuppressionV4Options(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef NonMaxSuppressionV5OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6929,13 +7681,13 @@ struct NonMaxSuppressionV5Options FLATBUFFERS_FINAL_CLASS : private flatbuffers:
  
  struct NonMaxSuppressionV5OptionsBuilder
  {
+  typedef NonMaxSuppressionV5Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit NonMaxSuppressionV5OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  NonMaxSuppressionV5OptionsBuilder &operator=(const NonMaxSuppressionV5OptionsBuilder &);
    flatbuffers::Offset<NonMaxSuppressionV5Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6953,6 +7705,7 @@ CreateNonMaxSuppressionV5Options(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef ScatterNdOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6961,13 +7714,13 @@ struct ScatterNdOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct ScatterNdOptionsBuilder
  {
+  typedef ScatterNdOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit ScatterNdOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ScatterNdOptionsBuilder &operator=(const ScatterNdOptionsBuilder &);
    flatbuffers::Offset<ScatterNdOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -6985,6 +7738,7 @@ CreateScatterNdOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SelectV2OptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -6993,13 +7747,13 @@ struct SelectV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SelectV2OptionsBuilder
  {
+  typedef SelectV2Options Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SelectV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SelectV2OptionsBuilder &operator=(const SelectV2OptionsBuilder &);
    flatbuffers::Offset<SelectV2Options> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7017,6 +7771,7 @@ CreateSelectV2Options(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef DensifyOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7025,13 +7780,13 @@ struct DensifyOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct DensifyOptionsBuilder
  {
+  typedef DensifyOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit DensifyOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  DensifyOptionsBuilder &operator=(const DensifyOptionsBuilder &);
    flatbuffers::Offset<DensifyOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7049,6 +7804,7 @@ CreateDensifyOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
+  typedef SegmentSumOptionsBuilder Builder;
    bool Verify(flatbuffers::Verifier &verifier) const
    {
      return VerifyTableStart(verifier) && verifier.EndTable();
@@ -7057,13 +7813,13 @@ struct SegmentSumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SegmentSumOptionsBuilder
  {
+  typedef SegmentSumOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    explicit SegmentSumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  SegmentSumOptionsBuilder &operator=(const SegmentSumOptionsBuilder &);
    flatbuffers::Offset<SegmentSumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7081,39 +7837,49 @@ CreateSegmentSumOptions(flatbuffers::FlatBufferBuilder &_fbb)
  
  struct BatchMatMulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef BatchMatMulOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_ADJOINT_LHS = 4,
-    VT_ADJOINT_RHS = 6
+    VT_ADJ_X = 4,
+    VT_ADJ_Y = 6,
+    VT_ASYMMETRIC_QUANTIZE_INPUTS = 8
    };
-  bool adjoint_lhs() const { return GetField<uint8_t>(VT_ADJOINT_LHS, 0) != 0; }
-  bool adjoint_rhs() const { return GetField<uint8_t>(VT_ADJOINT_RHS, 0) != 0; }
+  bool adj_x() const { return GetField<uint8_t>(VT_ADJ_X, 0) != 0; }
+  bool adj_y() const { return GetField<uint8_t>(VT_ADJ_Y, 0) != 0; }
+  bool asymmetric_quantize_inputs() const
+  {
+    return GetField<uint8_t>(VT_ASYMMETRIC_QUANTIZE_INPUTS, 0) != 0;
+  }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJOINT_LHS) &&
-           VerifyField<uint8_t>(verifier, VT_ADJOINT_RHS) && verifier.EndTable();
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_ADJ_X) &&
+           VerifyField<uint8_t>(verifier, VT_ADJ_Y) &&
+           VerifyField<uint8_t>(verifier, VT_ASYMMETRIC_QUANTIZE_INPUTS) && verifier.EndTable();
    }
  };
  
  struct BatchMatMulOptionsBuilder
  {
+  typedef BatchMatMulOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_adjoint_lhs(bool adjoint_lhs)
+  void add_adj_x(bool adj_x)
    {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_LHS, static_cast<uint8_t>(adjoint_lhs),
-                             0);
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_X, static_cast<uint8_t>(adj_x), 0);
    }
-  void add_adjoint_rhs(bool adjoint_rhs)
+  void add_adj_y(bool adj_y)
    {
-    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJOINT_RHS, static_cast<uint8_t>(adjoint_rhs),
-                             0);
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ADJ_Y, static_cast<uint8_t>(adj_y), 0);
+  }
+  void add_asymmetric_quantize_inputs(bool asymmetric_quantize_inputs)
+  {
+    fbb_.AddElement<uint8_t>(BatchMatMulOptions::VT_ASYMMETRIC_QUANTIZE_INPUTS,
+                             static_cast<uint8_t>(asymmetric_quantize_inputs), 0);
    }
    explicit BatchMatMulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  BatchMatMulOptionsBuilder &operator=(const BatchMatMulOptionsBuilder &);
    flatbuffers::Offset<BatchMatMulOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -7123,95 +7889,571 @@ struct BatchMatMulOptionsBuilder
  };
  
  inline flatbuffers::Offset<BatchMatMulOptions>
-CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adjoint_lhs = false,
-                         bool adjoint_rhs = false)
+CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, bool adj_x = false,
+                         bool adj_y = false, bool asymmetric_quantize_inputs = false)
  {
    BatchMatMulOptionsBuilder builder_(_fbb);
-  builder_.add_adjoint_rhs(adjoint_rhs);
-  builder_.add_adjoint_lhs(adjoint_lhs);
+  builder_.add_asymmetric_quantize_inputs(asymmetric_quantize_inputs);
+  builder_.add_adj_y(adj_y);
+  builder_.add_adj_x(adj_x);
    return builder_.Finish();
  }
  
-struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct CumsumOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef CumsumOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
-    VT_BUILTIN_CODE = 4,
-    VT_CUSTOM_CODE = 6,
-    VT_VERSION = 8
+    VT_EXCLUSIVE = 4,
+    VT_REVERSE = 6
    };
-  BuiltinOperator builtin_code() const
-  {
-    return static_cast<BuiltinOperator>(GetField<int8_t>(VT_BUILTIN_CODE, 0));
-  }
-  const flatbuffers::String *custom_code() const
-  {
-    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
-  }
-  int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  bool exclusive() const { return GetField<uint8_t>(VT_EXCLUSIVE, 0) != 0; }
+  bool reverse() const { return GetField<uint8_t>(VT_REVERSE, 0) != 0; }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
-    return VerifyTableStart(verifier) && VerifyField<int8_t>(verifier, VT_BUILTIN_CODE) &&
-           VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) && verifier.EndTable();
+    return VerifyTableStart(verifier) && VerifyField<uint8_t>(verifier, VT_EXCLUSIVE) &&
+           VerifyField<uint8_t>(verifier, VT_REVERSE) && verifier.EndTable();
    }
  };
  
-struct OperatorCodeBuilder
+struct CumsumOptionsBuilder
  {
+  typedef CumsumOptions Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_builtin_code(BuiltinOperator builtin_code)
-  {
-    fbb_.AddElement<int8_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int8_t>(builtin_code), 0);
-  }
-  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
+  void add_exclusive(bool exclusive)
    {
-    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_EXCLUSIVE, static_cast<uint8_t>(exclusive), 0);
    }
-  void add_version(int32_t version)
+  void add_reverse(bool reverse)
    {
-    fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+    fbb_.AddElement<uint8_t>(CumsumOptions::VT_REVERSE, static_cast<uint8_t>(reverse), 0);
    }
-  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  explicit CumsumOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  OperatorCodeBuilder &operator=(const OperatorCodeBuilder &);
-  flatbuffers::Offset<OperatorCode> Finish()
+  flatbuffers::Offset<CumsumOptions> Finish()
    {
      const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<OperatorCode>(end);
+    auto o = flatbuffers::Offset<CumsumOptions>(end);
      return o;
    }
  };
  
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb,
-                   BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1)
+inline flatbuffers::Offset<CumsumOptions> CreateCumsumOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              bool exclusive = false,
+                                                              bool reverse = false)
  {
-  OperatorCodeBuilder builder_(_fbb);
-  builder_.add_version(version);
-  builder_.add_custom_code(custom_code);
-  builder_.add_builtin_code(builtin_code);
+  CumsumOptionsBuilder builder_(_fbb);
+  builder_.add_reverse(reverse);
+  builder_.add_exclusive(exclusive);
    return builder_.Finish();
  }
  
-inline flatbuffers::Offset<OperatorCode>
-CreateOperatorCodeDirect(flatbuffers::FlatBufferBuilder &_fbb,
-                         BuiltinOperator builtin_code = BuiltinOperator_ADD,
-                         const char *custom_code = nullptr, int32_t version = 1)
+struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  return onert_tflite::CreateOperatorCode(
-    _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0, version);
-}
+  typedef BroadcastToOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
  
-struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+struct BroadcastToOptionsBuilder
  {
-  enum
+  typedef BroadcastToOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
-    VT_OPCODE_INDEX = 4,
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<BroadcastToOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<BroadcastToOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<BroadcastToOptions>
+CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  BroadcastToOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct Rfft2dOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef Rfft2dOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct Rfft2dOptionsBuilder
+{
+  typedef Rfft2dOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit Rfft2dOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<Rfft2dOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<Rfft2dOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<Rfft2dOptions> CreateRfft2dOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  Rfft2dOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_TABLE_ID = 4,
+    VT_KEY_DTYPE = 6,
+    VT_VALUE_DTYPE = 8
+  };
+  int32_t table_id() const { return GetField<int32_t>(VT_TABLE_ID, 0); }
+  onert_tflite::TensorType key_dtype() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_KEY_DTYPE, 0));
+  }
+  onert_tflite::TensorType value_dtype() const
+  {
+    return static_cast<onert_tflite::TensorType>(GetField<int8_t>(VT_VALUE_DTYPE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_TABLE_ID) &&
+           VerifyField<int8_t>(verifier, VT_KEY_DTYPE) &&
+           VerifyField<int8_t>(verifier, VT_VALUE_DTYPE) && verifier.EndTable();
+  }
+};
+
+struct HashtableOptionsBuilder
+{
+  typedef HashtableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_table_id(int32_t table_id)
+  {
+    fbb_.AddElement<int32_t>(HashtableOptions::VT_TABLE_ID, table_id, 0);
+  }
+  void add_key_dtype(onert_tflite::TensorType key_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_KEY_DTYPE, static_cast<int8_t>(key_dtype), 0);
+  }
+  void add_value_dtype(onert_tflite::TensorType value_dtype)
+  {
+    fbb_.AddElement<int8_t>(HashtableOptions::VT_VALUE_DTYPE, static_cast<int8_t>(value_dtype), 0);
+  }
+  explicit HashtableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableOptions>
+CreateHashtableOptions(flatbuffers::FlatBufferBuilder &_fbb, int32_t table_id = 0,
+                       onert_tflite::TensorType key_dtype = onert_tflite::TensorType_FLOAT32,
+                       onert_tflite::TensorType value_dtype = onert_tflite::TensorType_FLOAT32)
+{
+  HashtableOptionsBuilder builder_(_fbb);
+  builder_.add_table_id(table_id);
+  builder_.add_value_dtype(value_dtype);
+  builder_.add_key_dtype(key_dtype);
+  return builder_.Finish();
+}
+
+struct HashtableFindOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableFindOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableFindOptionsBuilder
+{
+  typedef HashtableFindOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableFindOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableFindOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableFindOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableFindOptions>
+CreateHashtableFindOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableFindOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableImportOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableImportOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableImportOptionsBuilder
+{
+  typedef HashtableImportOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableImportOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableImportOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableImportOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableImportOptions>
+CreateHashtableImportOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableImportOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct HashtableSizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef HashtableSizeOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct HashtableSizeOptionsBuilder
+{
+  typedef HashtableSizeOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit HashtableSizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<HashtableSizeOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<HashtableSizeOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<HashtableSizeOptions>
+CreateHashtableSizeOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  HashtableSizeOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct VarHandleOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef VarHandleOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_CONTAINER = 4,
+    VT_SHARED_NAME = 6
+  };
+  const flatbuffers::String *container() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CONTAINER);
+  }
+  const flatbuffers::String *shared_name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SHARED_NAME);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_CONTAINER) &&
+           verifier.VerifyString(container()) && VerifyOffset(verifier, VT_SHARED_NAME) &&
+           verifier.VerifyString(shared_name()) && verifier.EndTable();
+  }
+};
+
+struct VarHandleOptionsBuilder
+{
+  typedef VarHandleOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_container(flatbuffers::Offset<flatbuffers::String> container)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_CONTAINER, container);
+  }
+  void add_shared_name(flatbuffers::Offset<flatbuffers::String> shared_name)
+  {
+    fbb_.AddOffset(VarHandleOptions::VT_SHARED_NAME, shared_name);
+  }
+  explicit VarHandleOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<VarHandleOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<VarHandleOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                       flatbuffers::Offset<flatbuffers::String> container = 0,
+                       flatbuffers::Offset<flatbuffers::String> shared_name = 0)
+{
+  VarHandleOptionsBuilder builder_(_fbb);
+  builder_.add_shared_name(shared_name);
+  builder_.add_container(container);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<VarHandleOptions>
+CreateVarHandleOptionsDirect(flatbuffers::FlatBufferBuilder &_fbb, const char *container = nullptr,
+                             const char *shared_name = nullptr)
+{
+  auto container__ = container ? _fbb.CreateString(container) : 0;
+  auto shared_name__ = shared_name ? _fbb.CreateString(shared_name) : 0;
+  return onert_tflite::CreateVarHandleOptions(_fbb, container__, shared_name__);
+}
+
+struct ReadVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef ReadVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct ReadVariableOptionsBuilder
+{
+  typedef ReadVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit ReadVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<ReadVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<ReadVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<ReadVariableOptions>
+CreateReadVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  ReadVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct AssignVariableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef AssignVariableOptionsBuilder Builder;
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && verifier.EndTable();
+  }
+};
+
+struct AssignVariableOptionsBuilder
+{
+  typedef AssignVariableOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  explicit AssignVariableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<AssignVariableOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<AssignVariableOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<AssignVariableOptions>
+CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb)
+{
+  AssignVariableOptionsBuilder builder_(_fbb);
+  return builder_.Finish();
+}
+
+struct RandomOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef RandomOptionsBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_SEED = 4,
+    VT_SEED2 = 6
+  };
+  int32_t seed() const { return GetField<int32_t>(VT_SEED, 0); }
+  int32_t seed2() const { return GetField<int32_t>(VT_SEED2, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyField<int32_t>(verifier, VT_SEED) &&
+           VerifyField<int32_t>(verifier, VT_SEED2) && verifier.EndTable();
+  }
+};
+
+struct RandomOptionsBuilder
+{
+  typedef RandomOptions Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_seed(int32_t seed) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED, seed, 0); }
+  void add_seed2(int32_t seed2) { fbb_.AddElement<int32_t>(RandomOptions::VT_SEED2, seed2, 0); }
+  explicit RandomOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<RandomOptions> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<RandomOptions>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<RandomOptions> CreateRandomOptions(flatbuffers::FlatBufferBuilder &_fbb,
+                                                              int32_t seed = 0, int32_t seed2 = 0)
+{
+  RandomOptionsBuilder builder_(_fbb);
+  builder_.add_seed2(seed2);
+  builder_.add_seed(seed);
+  return builder_.Finish();
+}
+
+struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OperatorCodeBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_DEPRECATED_BUILTIN_CODE = 4,
+    VT_CUSTOM_CODE = 6,
+    VT_VERSION = 8,
+    VT_BUILTIN_CODE = 10
+  };
+  int8_t deprecated_builtin_code() const { return GetField<int8_t>(VT_DEPRECATED_BUILTIN_CODE, 0); }
+  const flatbuffers::String *custom_code() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_CUSTOM_CODE);
+  }
+  int32_t version() const { return GetField<int32_t>(VT_VERSION, 1); }
+  onert_tflite::BuiltinOperator builtin_code() const
+  {
+    return static_cast<onert_tflite::BuiltinOperator>(GetField<int32_t>(VT_BUILTIN_CODE, 0));
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_DEPRECATED_BUILTIN_CODE) &&
+           VerifyOffset(verifier, VT_CUSTOM_CODE) && verifier.VerifyString(custom_code()) &&
+           VerifyField<int32_t>(verifier, VT_VERSION) &&
+           VerifyField<int32_t>(verifier, VT_BUILTIN_CODE) && verifier.EndTable();
+  }
+};
+
+struct OperatorCodeBuilder
+{
+  typedef OperatorCode Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_deprecated_builtin_code(int8_t deprecated_builtin_code)
+  {
+    fbb_.AddElement<int8_t>(OperatorCode::VT_DEPRECATED_BUILTIN_CODE, deprecated_builtin_code, 0);
+  }
+  void add_custom_code(flatbuffers::Offset<flatbuffers::String> custom_code)
+  {
+    fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code);
+  }
+  void add_version(int32_t version)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_VERSION, version, 1);
+  }
+  void add_builtin_code(onert_tflite::BuiltinOperator builtin_code)
+  {
+    fbb_.AddElement<int32_t>(OperatorCode::VT_BUILTIN_CODE, static_cast<int32_t>(builtin_code), 0);
+  }
+  explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<OperatorCode> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<OperatorCode>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<OperatorCode>
+CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+                   flatbuffers::Offset<flatbuffers::String> custom_code = 0, int32_t version = 1,
+                   onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
+{
+  OperatorCodeBuilder builder_(_fbb);
+  builder_.add_builtin_code(builtin_code);
+  builder_.add_version(version);
+  builder_.add_custom_code(custom_code);
+  builder_.add_deprecated_builtin_code(deprecated_builtin_code);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<OperatorCode> CreateOperatorCodeDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, int8_t deprecated_builtin_code = 0,
+  const char *custom_code = nullptr, int32_t version = 1,
+  onert_tflite::BuiltinOperator builtin_code = onert_tflite::BuiltinOperator_ADD)
+{
+  auto custom_code__ = custom_code ? _fbb.CreateString(custom_code) : 0;
+  return onert_tflite::CreateOperatorCode(_fbb, deprecated_builtin_code, custom_code__, version,
+                                          builtin_code);
+}
+
+struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef OperatorBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_OPCODE_INDEX = 4,
      VT_INPUTS = 6,
      VT_OUTPUTS = 8,
      VT_BUILTIN_OPTIONS_TYPE = 10,
@@ -7230,628 +8472,715 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    {
      return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
    }
-  BuiltinOptions builtin_options_type() const
+  onert_tflite::BuiltinOptions builtin_options_type() const
    {
-    return static_cast<BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
+    return static_cast<onert_tflite::BuiltinOptions>(GetField<uint8_t>(VT_BUILTIN_OPTIONS_TYPE, 0));
    }
    const void *builtin_options() const { return GetPointer<const void *>(VT_BUILTIN_OPTIONS); }
    template <typename T> const T *builtin_options_as() const;
-  const Conv2DOptions *builtin_options_as_Conv2DOptions() const
+  const onert_tflite::Conv2DOptions *builtin_options_as_Conv2DOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_Conv2DOptions
-             ? static_cast<const Conv2DOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Conv2DOptions
+             ? static_cast<const onert_tflite::Conv2DOptions *>(builtin_options())
               : nullptr;
    }
-  const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
+  const onert_tflite::DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions
-             ? static_cast<const DepthwiseConv2DOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DepthwiseConv2DOptions
+             ? static_cast<const onert_tflite::DepthwiseConv2DOptions *>(builtin_options())
               : nullptr;
    }
-  const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
+  const onert_tflite::ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions
-             ? static_cast<const ConcatEmbeddingsOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatEmbeddingsOptions
+             ? static_cast<const onert_tflite::ConcatEmbeddingsOptions *>(builtin_options())
               : nullptr;
    }
-  const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
+  const onert_tflite::LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LSHProjectionOptions
-             ? static_cast<const LSHProjectionOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LSHProjectionOptions
+             ? static_cast<const onert_tflite::LSHProjectionOptions *>(builtin_options())
               : nullptr;
    }
-  const Pool2DOptions *builtin_options_as_Pool2DOptions() const
+  const onert_tflite::Pool2DOptions *builtin_options_as_Pool2DOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_Pool2DOptions
-             ? static_cast<const Pool2DOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Pool2DOptions
+             ? static_cast<const onert_tflite::Pool2DOptions *>(builtin_options())
               : nullptr;
    }
-  const SVDFOptions *builtin_options_as_SVDFOptions() const
+  const onert_tflite::SVDFOptions *builtin_options_as_SVDFOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SVDFOptions
-             ? static_cast<const SVDFOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SVDFOptions
+             ? static_cast<const onert_tflite::SVDFOptions *>(builtin_options())
               : nullptr;
    }
-  const RNNOptions *builtin_options_as_RNNOptions() const
+  const onert_tflite::RNNOptions *builtin_options_as_RNNOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_RNNOptions
-             ? static_cast<const RNNOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RNNOptions
+             ? static_cast<const onert_tflite::RNNOptions *>(builtin_options())
               : nullptr;
    }
-  const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
+  const onert_tflite::FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_FullyConnectedOptions
-             ? static_cast<const FullyConnectedOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FullyConnectedOptions
+             ? static_cast<const onert_tflite::FullyConnectedOptions *>(builtin_options())
               : nullptr;
    }
-  const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
+  const onert_tflite::SoftmaxOptions *builtin_options_as_SoftmaxOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SoftmaxOptions
-             ? static_cast<const SoftmaxOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SoftmaxOptions
+             ? static_cast<const onert_tflite::SoftmaxOptions *>(builtin_options())
               : nullptr;
    }
-  const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
+  const onert_tflite::ConcatenationOptions *builtin_options_as_ConcatenationOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ConcatenationOptions
-             ? static_cast<const ConcatenationOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ConcatenationOptions
+             ? static_cast<const onert_tflite::ConcatenationOptions *>(builtin_options())
               : nullptr;
    }
-  const AddOptions *builtin_options_as_AddOptions() const
+  const onert_tflite::AddOptions *builtin_options_as_AddOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_AddOptions
-             ? static_cast<const AddOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AddOptions
+             ? static_cast<const onert_tflite::AddOptions *>(builtin_options())
               : nullptr;
    }
-  const L2NormOptions *builtin_options_as_L2NormOptions() const
+  const onert_tflite::L2NormOptions *builtin_options_as_L2NormOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_L2NormOptions
-             ? static_cast<const L2NormOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_L2NormOptions
+             ? static_cast<const onert_tflite::L2NormOptions *>(builtin_options())
               : nullptr;
    }
-  const LocalResponseNormalizationOptions *
+  const onert_tflite::LocalResponseNormalizationOptions *
    builtin_options_as_LocalResponseNormalizationOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions
-             ? static_cast<const LocalResponseNormalizationOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LocalResponseNormalizationOptions
+             ? static_cast<const onert_tflite::LocalResponseNormalizationOptions *>(
+                 builtin_options())
               : nullptr;
    }
-  const LSTMOptions *builtin_options_as_LSTMOptions() const
+  const onert_tflite::LSTMOptions *builtin_options_as_LSTMOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LSTMOptions
-             ? static_cast<const LSTMOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LSTMOptions
+             ? static_cast<const onert_tflite::LSTMOptions *>(builtin_options())
               : nullptr;
    }
-  const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
+  const onert_tflite::ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions
-             ? static_cast<const ResizeBilinearOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeBilinearOptions
+             ? static_cast<const onert_tflite::ResizeBilinearOptions *>(builtin_options())
               : nullptr;
    }
-  const CallOptions *builtin_options_as_CallOptions() const
+  const onert_tflite::CallOptions *builtin_options_as_CallOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_CallOptions
-             ? static_cast<const CallOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CallOptions
+             ? static_cast<const onert_tflite::CallOptions *>(builtin_options())
               : nullptr;
    }
-  const ReshapeOptions *builtin_options_as_ReshapeOptions() const
+  const onert_tflite::ReshapeOptions *builtin_options_as_ReshapeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ReshapeOptions
-             ? static_cast<const ReshapeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReshapeOptions
+             ? static_cast<const onert_tflite::ReshapeOptions *>(builtin_options())
               : nullptr;
    }
-  const SkipGramOptions *builtin_options_as_SkipGramOptions() const
+  const onert_tflite::SkipGramOptions *builtin_options_as_SkipGramOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SkipGramOptions
-             ? static_cast<const SkipGramOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SkipGramOptions
+             ? static_cast<const onert_tflite::SkipGramOptions *>(builtin_options())
               : nullptr;
    }
-  const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
+  const onert_tflite::SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions
-             ? static_cast<const SpaceToDepthOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToDepthOptions
+             ? static_cast<const onert_tflite::SpaceToDepthOptions *>(builtin_options())
               : nullptr;
    }
-  const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const
+  const onert_tflite::EmbeddingLookupSparseOptions *
+  builtin_options_as_EmbeddingLookupSparseOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions
-             ? static_cast<const EmbeddingLookupSparseOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_EmbeddingLookupSparseOptions
+             ? static_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(builtin_options())
               : nullptr;
    }
-  const MulOptions *builtin_options_as_MulOptions() const
+  const onert_tflite::MulOptions *builtin_options_as_MulOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_MulOptions
-             ? static_cast<const MulOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MulOptions
+             ? static_cast<const onert_tflite::MulOptions *>(builtin_options())
               : nullptr;
    }
-  const PadOptions *builtin_options_as_PadOptions() const
+  const onert_tflite::PadOptions *builtin_options_as_PadOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_PadOptions
-             ? static_cast<const PadOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PadOptions
+             ? static_cast<const onert_tflite::PadOptions *>(builtin_options())
               : nullptr;
    }
-  const GatherOptions *builtin_options_as_GatherOptions() const
+  const onert_tflite::GatherOptions *builtin_options_as_GatherOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_GatherOptions
-             ? static_cast<const GatherOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GatherOptions
+             ? static_cast<const onert_tflite::GatherOptions *>(builtin_options())
               : nullptr;
    }
-  const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
+  const onert_tflite::BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions
-             ? static_cast<const BatchToSpaceNDOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BatchToSpaceNDOptions
+             ? static_cast<const onert_tflite::BatchToSpaceNDOptions *>(builtin_options())
               : nullptr;
    }
-  const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
+  const onert_tflite::SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions
-             ? static_cast<const SpaceToBatchNDOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SpaceToBatchNDOptions
+             ? static_cast<const onert_tflite::SpaceToBatchNDOptions *>(builtin_options())
               : nullptr;
    }
-  const TransposeOptions *builtin_options_as_TransposeOptions() const
+  const onert_tflite::TransposeOptions *builtin_options_as_TransposeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_TransposeOptions
-             ? static_cast<const TransposeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeOptions
+             ? static_cast<const onert_tflite::TransposeOptions *>(builtin_options())
               : nullptr;
    }
-  const ReducerOptions *builtin_options_as_ReducerOptions() const
+  const onert_tflite::ReducerOptions *builtin_options_as_ReducerOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ReducerOptions
-             ? static_cast<const ReducerOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReducerOptions
+             ? static_cast<const onert_tflite::ReducerOptions *>(builtin_options())
               : nullptr;
    }
-  const SubOptions *builtin_options_as_SubOptions() const
+  const onert_tflite::SubOptions *builtin_options_as_SubOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SubOptions
-             ? static_cast<const SubOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SubOptions
+             ? static_cast<const onert_tflite::SubOptions *>(builtin_options())
               : nullptr;
    }
-  const DivOptions *builtin_options_as_DivOptions() const
+  const onert_tflite::DivOptions *builtin_options_as_DivOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_DivOptions
-             ? static_cast<const DivOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DivOptions
+             ? static_cast<const onert_tflite::DivOptions *>(builtin_options())
               : nullptr;
    }
-  const SqueezeOptions *builtin_options_as_SqueezeOptions() const
+  const onert_tflite::SqueezeOptions *builtin_options_as_SqueezeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SqueezeOptions
-             ? static_cast<const SqueezeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SqueezeOptions
+             ? static_cast<const onert_tflite::SqueezeOptions *>(builtin_options())
               : nullptr;
    }
-  const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
+  const onert_tflite::SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SequenceRNNOptions
-             ? static_cast<const SequenceRNNOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SequenceRNNOptions
+             ? static_cast<const onert_tflite::SequenceRNNOptions *>(builtin_options())
               : nullptr;
    }
-  const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
+  const onert_tflite::StridedSliceOptions *builtin_options_as_StridedSliceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_StridedSliceOptions
-             ? static_cast<const StridedSliceOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_StridedSliceOptions
+             ? static_cast<const onert_tflite::StridedSliceOptions *>(builtin_options())
               : nullptr;
    }
-  const ExpOptions *builtin_options_as_ExpOptions() const
+  const onert_tflite::ExpOptions *builtin_options_as_ExpOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ExpOptions
-             ? static_cast<const ExpOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ExpOptions
+             ? static_cast<const onert_tflite::ExpOptions *>(builtin_options())
               : nullptr;
    }
-  const TopKV2Options *builtin_options_as_TopKV2Options() const
+  const onert_tflite::TopKV2Options *builtin_options_as_TopKV2Options() const
    {
-    return builtin_options_type() == BuiltinOptions_TopKV2Options
-             ? static_cast<const TopKV2Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TopKV2Options
+             ? static_cast<const onert_tflite::TopKV2Options *>(builtin_options())
               : nullptr;
    }
-  const SplitOptions *builtin_options_as_SplitOptions() const
+  const onert_tflite::SplitOptions *builtin_options_as_SplitOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SplitOptions
-             ? static_cast<const SplitOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SplitOptions
+             ? static_cast<const onert_tflite::SplitOptions *>(builtin_options())
               : nullptr;
    }
-  const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
+  const onert_tflite::LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions
-             ? static_cast<const LogSoftmaxOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogSoftmaxOptions
+             ? static_cast<const onert_tflite::LogSoftmaxOptions *>(builtin_options())
               : nullptr;
    }
-  const CastOptions *builtin_options_as_CastOptions() const
+  const onert_tflite::CastOptions *builtin_options_as_CastOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_CastOptions
-             ? static_cast<const CastOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CastOptions
+             ? static_cast<const onert_tflite::CastOptions *>(builtin_options())
               : nullptr;
    }
-  const DequantizeOptions *builtin_options_as_DequantizeOptions() const
+  const onert_tflite::DequantizeOptions *builtin_options_as_DequantizeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_DequantizeOptions
-             ? static_cast<const DequantizeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DequantizeOptions
+             ? static_cast<const onert_tflite::DequantizeOptions *>(builtin_options())
               : nullptr;
    }
-  const MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
+  const onert_tflite::MaximumMinimumOptions *builtin_options_as_MaximumMinimumOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_MaximumMinimumOptions
-             ? static_cast<const MaximumMinimumOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MaximumMinimumOptions
+             ? static_cast<const onert_tflite::MaximumMinimumOptions *>(builtin_options())
               : nullptr;
    }
-  const ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
+  const onert_tflite::ArgMaxOptions *builtin_options_as_ArgMaxOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ArgMaxOptions
-             ? static_cast<const ArgMaxOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMaxOptions
+             ? static_cast<const onert_tflite::ArgMaxOptions *>(builtin_options())
               : nullptr;
    }
-  const LessOptions *builtin_options_as_LessOptions() const
+  const onert_tflite::LessOptions *builtin_options_as_LessOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LessOptions
-             ? static_cast<const LessOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LessOptions
+             ? static_cast<const onert_tflite::LessOptions *>(builtin_options())
               : nullptr;
    }
-  const NegOptions *builtin_options_as_NegOptions() const
+  const onert_tflite::NegOptions *builtin_options_as_NegOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_NegOptions
-             ? static_cast<const NegOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NegOptions
+             ? static_cast<const onert_tflite::NegOptions *>(builtin_options())
               : nullptr;
    }
-  const PadV2Options *builtin_options_as_PadV2Options() const
+  const onert_tflite::PadV2Options *builtin_options_as_PadV2Options() const
    {
-    return builtin_options_type() == BuiltinOptions_PadV2Options
-             ? static_cast<const PadV2Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PadV2Options
+             ? static_cast<const onert_tflite::PadV2Options *>(builtin_options())
               : nullptr;
    }
-  const GreaterOptions *builtin_options_as_GreaterOptions() const
+  const onert_tflite::GreaterOptions *builtin_options_as_GreaterOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_GreaterOptions
-             ? static_cast<const GreaterOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterOptions
+             ? static_cast<const onert_tflite::GreaterOptions *>(builtin_options())
               : nullptr;
    }
-  const GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
+  const onert_tflite::GreaterEqualOptions *builtin_options_as_GreaterEqualOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_GreaterEqualOptions
-             ? static_cast<const GreaterEqualOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GreaterEqualOptions
+             ? static_cast<const onert_tflite::GreaterEqualOptions *>(builtin_options())
               : nullptr;
    }
-  const LessEqualOptions *builtin_options_as_LessEqualOptions() const
+  const onert_tflite::LessEqualOptions *builtin_options_as_LessEqualOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LessEqualOptions
-             ? static_cast<const LessEqualOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LessEqualOptions
+             ? static_cast<const onert_tflite::LessEqualOptions *>(builtin_options())
               : nullptr;
    }
-  const SelectOptions *builtin_options_as_SelectOptions() const
+  const onert_tflite::SelectOptions *builtin_options_as_SelectOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SelectOptions
-             ? static_cast<const SelectOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SelectOptions
+             ? static_cast<const onert_tflite::SelectOptions *>(builtin_options())
               : nullptr;
    }
-  const SliceOptions *builtin_options_as_SliceOptions() const
+  const onert_tflite::SliceOptions *builtin_options_as_SliceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SliceOptions
-             ? static_cast<const SliceOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SliceOptions
+             ? static_cast<const onert_tflite::SliceOptions *>(builtin_options())
               : nullptr;
    }
-  const TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
+  const onert_tflite::TransposeConvOptions *builtin_options_as_TransposeConvOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_TransposeConvOptions
-             ? static_cast<const TransposeConvOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TransposeConvOptions
+             ? static_cast<const onert_tflite::TransposeConvOptions *>(builtin_options())
               : nullptr;
    }
-  const SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
+  const onert_tflite::SparseToDenseOptions *builtin_options_as_SparseToDenseOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SparseToDenseOptions
-             ? static_cast<const SparseToDenseOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SparseToDenseOptions
+             ? static_cast<const onert_tflite::SparseToDenseOptions *>(builtin_options())
               : nullptr;
    }
-  const TileOptions *builtin_options_as_TileOptions() const
+  const onert_tflite::TileOptions *builtin_options_as_TileOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_TileOptions
-             ? static_cast<const TileOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_TileOptions
+             ? static_cast<const onert_tflite::TileOptions *>(builtin_options())
               : nullptr;
    }
-  const ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
+  const onert_tflite::ExpandDimsOptions *builtin_options_as_ExpandDimsOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ExpandDimsOptions
-             ? static_cast<const ExpandDimsOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ExpandDimsOptions
+             ? static_cast<const onert_tflite::ExpandDimsOptions *>(builtin_options())
               : nullptr;
    }
-  const EqualOptions *builtin_options_as_EqualOptions() const
+  const onert_tflite::EqualOptions *builtin_options_as_EqualOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_EqualOptions
-             ? static_cast<const EqualOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_EqualOptions
+             ? static_cast<const onert_tflite::EqualOptions *>(builtin_options())
               : nullptr;
    }
-  const NotEqualOptions *builtin_options_as_NotEqualOptions() const
+  const onert_tflite::NotEqualOptions *builtin_options_as_NotEqualOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_NotEqualOptions
-             ? static_cast<const NotEqualOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NotEqualOptions
+             ? static_cast<const onert_tflite::NotEqualOptions *>(builtin_options())
               : nullptr;
    }
-  const ShapeOptions *builtin_options_as_ShapeOptions() const
+  const onert_tflite::ShapeOptions *builtin_options_as_ShapeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ShapeOptions
-             ? static_cast<const ShapeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ShapeOptions
+             ? static_cast<const onert_tflite::ShapeOptions *>(builtin_options())
               : nullptr;
    }
-  const PowOptions *builtin_options_as_PowOptions() const
+  const onert_tflite::PowOptions *builtin_options_as_PowOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_PowOptions
-             ? static_cast<const PowOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PowOptions
+             ? static_cast<const onert_tflite::PowOptions *>(builtin_options())
               : nullptr;
    }
-  const ArgMinOptions *builtin_options_as_ArgMinOptions() const
+  const onert_tflite::ArgMinOptions *builtin_options_as_ArgMinOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ArgMinOptions
-             ? static_cast<const ArgMinOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ArgMinOptions
+             ? static_cast<const onert_tflite::ArgMinOptions *>(builtin_options())
               : nullptr;
    }
-  const FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
+  const onert_tflite::FakeQuantOptions *builtin_options_as_FakeQuantOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_FakeQuantOptions
-             ? static_cast<const FakeQuantOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FakeQuantOptions
+             ? static_cast<const onert_tflite::FakeQuantOptions *>(builtin_options())
               : nullptr;
    }
-  const PackOptions *builtin_options_as_PackOptions() const
+  const onert_tflite::PackOptions *builtin_options_as_PackOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_PackOptions
-             ? static_cast<const PackOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_PackOptions
+             ? static_cast<const onert_tflite::PackOptions *>(builtin_options())
               : nullptr;
    }
-  const LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
+  const onert_tflite::LogicalOrOptions *builtin_options_as_LogicalOrOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LogicalOrOptions
-             ? static_cast<const LogicalOrOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalOrOptions
+             ? static_cast<const onert_tflite::LogicalOrOptions *>(builtin_options())
               : nullptr;
    }
-  const OneHotOptions *builtin_options_as_OneHotOptions() const
+  const onert_tflite::OneHotOptions *builtin_options_as_OneHotOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_OneHotOptions
-             ? static_cast<const OneHotOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_OneHotOptions
+             ? static_cast<const onert_tflite::OneHotOptions *>(builtin_options())
               : nullptr;
    }
-  const LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
+  const onert_tflite::LogicalAndOptions *builtin_options_as_LogicalAndOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LogicalAndOptions
-             ? static_cast<const LogicalAndOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalAndOptions
+             ? static_cast<const onert_tflite::LogicalAndOptions *>(builtin_options())
               : nullptr;
    }
-  const LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
+  const onert_tflite::LogicalNotOptions *builtin_options_as_LogicalNotOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LogicalNotOptions
-             ? static_cast<const LogicalNotOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LogicalNotOptions
+             ? static_cast<const onert_tflite::LogicalNotOptions *>(builtin_options())
               : nullptr;
    }
-  const UnpackOptions *builtin_options_as_UnpackOptions() const
+  const onert_tflite::UnpackOptions *builtin_options_as_UnpackOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_UnpackOptions
-             ? static_cast<const UnpackOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnpackOptions
+             ? static_cast<const onert_tflite::UnpackOptions *>(builtin_options())
               : nullptr;
    }
-  const FloorDivOptions *builtin_options_as_FloorDivOptions() const
+  const onert_tflite::FloorDivOptions *builtin_options_as_FloorDivOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_FloorDivOptions
-             ? static_cast<const FloorDivOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FloorDivOptions
+             ? static_cast<const onert_tflite::FloorDivOptions *>(builtin_options())
               : nullptr;
    }
-  const SquareOptions *builtin_options_as_SquareOptions() const
+  const onert_tflite::SquareOptions *builtin_options_as_SquareOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SquareOptions
-             ? static_cast<const SquareOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SquareOptions
+             ? static_cast<const onert_tflite::SquareOptions *>(builtin_options())
               : nullptr;
    }
-  const ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
+  const onert_tflite::ZerosLikeOptions *builtin_options_as_ZerosLikeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ZerosLikeOptions
-             ? static_cast<const ZerosLikeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ZerosLikeOptions
+             ? static_cast<const onert_tflite::ZerosLikeOptions *>(builtin_options())
               : nullptr;
    }
-  const FillOptions *builtin_options_as_FillOptions() const
+  const onert_tflite::FillOptions *builtin_options_as_FillOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_FillOptions
-             ? static_cast<const FillOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FillOptions
+             ? static_cast<const onert_tflite::FillOptions *>(builtin_options())
               : nullptr;
    }
-  const BidirectionalSequenceLSTMOptions *
+  const onert_tflite::BidirectionalSequenceLSTMOptions *
    builtin_options_as_BidirectionalSequenceLSTMOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceLSTMOptions
-             ? static_cast<const BidirectionalSequenceLSTMOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceLSTMOptions
+             ? static_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(
+                 builtin_options())
               : nullptr;
    }
-  const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const
+  const onert_tflite::BidirectionalSequenceRNNOptions *
+  builtin_options_as_BidirectionalSequenceRNNOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions
-             ? static_cast<const BidirectionalSequenceRNNOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BidirectionalSequenceRNNOptions
+             ? static_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(builtin_options())
               : nullptr;
    }
-  const UnidirectionalSequenceLSTMOptions *
+  const onert_tflite::UnidirectionalSequenceLSTMOptions *
    builtin_options_as_UnidirectionalSequenceLSTMOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions
-             ? static_cast<const UnidirectionalSequenceLSTMOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UnidirectionalSequenceLSTMOptions
+             ? static_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(
+                 builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::FloorModOptions *builtin_options_as_FloorModOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_FloorModOptions
+             ? static_cast<const onert_tflite::FloorModOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::RangeOptions *builtin_options_as_RangeOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RangeOptions
+             ? static_cast<const onert_tflite::RangeOptions *>(builtin_options())
               : nullptr;
    }
-  const FloorModOptions *builtin_options_as_FloorModOptions() const
+  const onert_tflite::ResizeNearestNeighborOptions *
+  builtin_options_as_ResizeNearestNeighborOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_FloorModOptions
-             ? static_cast<const FloorModOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ResizeNearestNeighborOptions
+             ? static_cast<const onert_tflite::ResizeNearestNeighborOptions *>(builtin_options())
               : nullptr;
    }
-  const RangeOptions *builtin_options_as_RangeOptions() const
+  const onert_tflite::LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_RangeOptions
-             ? static_cast<const RangeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_LeakyReluOptions
+             ? static_cast<const onert_tflite::LeakyReluOptions *>(builtin_options())
               : nullptr;
    }
-  const ResizeNearestNeighborOptions *builtin_options_as_ResizeNearestNeighborOptions() const
+  const onert_tflite::SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ResizeNearestNeighborOptions
-             ? static_cast<const ResizeNearestNeighborOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SquaredDifferenceOptions
+             ? static_cast<const onert_tflite::SquaredDifferenceOptions *>(builtin_options())
               : nullptr;
    }
-  const LeakyReluOptions *builtin_options_as_LeakyReluOptions() const
+  const onert_tflite::MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_LeakyReluOptions
-             ? static_cast<const LeakyReluOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MirrorPadOptions
+             ? static_cast<const onert_tflite::MirrorPadOptions *>(builtin_options())
               : nullptr;
    }
-  const SquaredDifferenceOptions *builtin_options_as_SquaredDifferenceOptions() const
+  const onert_tflite::AbsOptions *builtin_options_as_AbsOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SquaredDifferenceOptions
-             ? static_cast<const SquaredDifferenceOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AbsOptions
+             ? static_cast<const onert_tflite::AbsOptions *>(builtin_options())
               : nullptr;
    }
-  const MirrorPadOptions *builtin_options_as_MirrorPadOptions() const
+  const onert_tflite::SplitVOptions *builtin_options_as_SplitVOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_MirrorPadOptions
-             ? static_cast<const MirrorPadOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SplitVOptions
+             ? static_cast<const onert_tflite::SplitVOptions *>(builtin_options())
               : nullptr;
    }
-  const AbsOptions *builtin_options_as_AbsOptions() const
+  const onert_tflite::UniqueOptions *builtin_options_as_UniqueOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_AbsOptions
-             ? static_cast<const AbsOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_UniqueOptions
+             ? static_cast<const onert_tflite::UniqueOptions *>(builtin_options())
               : nullptr;
    }
-  const SplitVOptions *builtin_options_as_SplitVOptions() const
+  const onert_tflite::ReverseV2Options *builtin_options_as_ReverseV2Options() const
    {
-    return builtin_options_type() == BuiltinOptions_SplitVOptions
-             ? static_cast<const SplitVOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseV2Options
+             ? static_cast<const onert_tflite::ReverseV2Options *>(builtin_options())
               : nullptr;
    }
-  const UniqueOptions *builtin_options_as_UniqueOptions() const
+  const onert_tflite::AddNOptions *builtin_options_as_AddNOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_UniqueOptions
-             ? static_cast<const UniqueOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AddNOptions
+             ? static_cast<const onert_tflite::AddNOptions *>(builtin_options())
               : nullptr;
    }
-  const ReverseV2Options *builtin_options_as_ReverseV2Options() const
+  const onert_tflite::GatherNdOptions *builtin_options_as_GatherNdOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ReverseV2Options
-             ? static_cast<const ReverseV2Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_GatherNdOptions
+             ? static_cast<const onert_tflite::GatherNdOptions *>(builtin_options())
               : nullptr;
    }
-  const AddNOptions *builtin_options_as_AddNOptions() const
+  const onert_tflite::CosOptions *builtin_options_as_CosOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_AddNOptions
-             ? static_cast<const AddNOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CosOptions
+             ? static_cast<const onert_tflite::CosOptions *>(builtin_options())
               : nullptr;
    }
-  const GatherNdOptions *builtin_options_as_GatherNdOptions() const
+  const onert_tflite::WhereOptions *builtin_options_as_WhereOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_GatherNdOptions
-             ? static_cast<const GatherNdOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_WhereOptions
+             ? static_cast<const onert_tflite::WhereOptions *>(builtin_options())
               : nullptr;
    }
-  const CosOptions *builtin_options_as_CosOptions() const
+  const onert_tflite::RankOptions *builtin_options_as_RankOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_CosOptions
-             ? static_cast<const CosOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RankOptions
+             ? static_cast<const onert_tflite::RankOptions *>(builtin_options())
               : nullptr;
    }
-  const WhereOptions *builtin_options_as_WhereOptions() const
+  const onert_tflite::ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_WhereOptions
-             ? static_cast<const WhereOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReverseSequenceOptions
+             ? static_cast<const onert_tflite::ReverseSequenceOptions *>(builtin_options())
               : nullptr;
    }
-  const RankOptions *builtin_options_as_RankOptions() const
+  const onert_tflite::MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_RankOptions
-             ? static_cast<const RankOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixDiagOptions
+             ? static_cast<const onert_tflite::MatrixDiagOptions *>(builtin_options())
               : nullptr;
    }
-  const ReverseSequenceOptions *builtin_options_as_ReverseSequenceOptions() const
+  const onert_tflite::QuantizeOptions *builtin_options_as_QuantizeOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ReverseSequenceOptions
-             ? static_cast<const ReverseSequenceOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_QuantizeOptions
+             ? static_cast<const onert_tflite::QuantizeOptions *>(builtin_options())
               : nullptr;
    }
-  const MatrixDiagOptions *builtin_options_as_MatrixDiagOptions() const
+  const onert_tflite::MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_MatrixDiagOptions
-             ? static_cast<const MatrixDiagOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_MatrixSetDiagOptions
+             ? static_cast<const onert_tflite::MatrixSetDiagOptions *>(builtin_options())
               : nullptr;
    }
-  const QuantizeOptions *builtin_options_as_QuantizeOptions() const
+  const onert_tflite::HardSwishOptions *builtin_options_as_HardSwishOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_QuantizeOptions
-             ? static_cast<const QuantizeOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HardSwishOptions
+             ? static_cast<const onert_tflite::HardSwishOptions *>(builtin_options())
               : nullptr;
    }
-  const MatrixSetDiagOptions *builtin_options_as_MatrixSetDiagOptions() const
+  const onert_tflite::IfOptions *builtin_options_as_IfOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_MatrixSetDiagOptions
-             ? static_cast<const MatrixSetDiagOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_IfOptions
+             ? static_cast<const onert_tflite::IfOptions *>(builtin_options())
               : nullptr;
    }
-  const HardSwishOptions *builtin_options_as_HardSwishOptions() const
+  const onert_tflite::WhileOptions *builtin_options_as_WhileOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_HardSwishOptions
-             ? static_cast<const HardSwishOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_WhileOptions
+             ? static_cast<const onert_tflite::WhileOptions *>(builtin_options())
               : nullptr;
    }
-  const IfOptions *builtin_options_as_IfOptions() const
+  const onert_tflite::DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_IfOptions
-             ? static_cast<const IfOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DepthToSpaceOptions
+             ? static_cast<const onert_tflite::DepthToSpaceOptions *>(builtin_options())
               : nullptr;
    }
-  const WhileOptions *builtin_options_as_WhileOptions() const
+  const onert_tflite::NonMaxSuppressionV4Options *
+  builtin_options_as_NonMaxSuppressionV4Options() const
    {
-    return builtin_options_type() == BuiltinOptions_WhileOptions
-             ? static_cast<const WhileOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV4Options
+             ? static_cast<const onert_tflite::NonMaxSuppressionV4Options *>(builtin_options())
               : nullptr;
    }
-  const DepthToSpaceOptions *builtin_options_as_DepthToSpaceOptions() const
+  const onert_tflite::NonMaxSuppressionV5Options *
+  builtin_options_as_NonMaxSuppressionV5Options() const
    {
-    return builtin_options_type() == BuiltinOptions_DepthToSpaceOptions
-             ? static_cast<const DepthToSpaceOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_NonMaxSuppressionV5Options
+             ? static_cast<const onert_tflite::NonMaxSuppressionV5Options *>(builtin_options())
               : nullptr;
    }
-  const NonMaxSuppressionV4Options *builtin_options_as_NonMaxSuppressionV4Options() const
+  const onert_tflite::ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV4Options
-             ? static_cast<const NonMaxSuppressionV4Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ScatterNdOptions
+             ? static_cast<const onert_tflite::ScatterNdOptions *>(builtin_options())
               : nullptr;
    }
-  const NonMaxSuppressionV5Options *builtin_options_as_NonMaxSuppressionV5Options() const
+  const onert_tflite::SelectV2Options *builtin_options_as_SelectV2Options() const
    {
-    return builtin_options_type() == BuiltinOptions_NonMaxSuppressionV5Options
-             ? static_cast<const NonMaxSuppressionV5Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SelectV2Options
+             ? static_cast<const onert_tflite::SelectV2Options *>(builtin_options())
               : nullptr;
    }
-  const ScatterNdOptions *builtin_options_as_ScatterNdOptions() const
+  const onert_tflite::DensifyOptions *builtin_options_as_DensifyOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_ScatterNdOptions
-             ? static_cast<const ScatterNdOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_DensifyOptions
+             ? static_cast<const onert_tflite::DensifyOptions *>(builtin_options())
               : nullptr;
    }
-  const SelectV2Options *builtin_options_as_SelectV2Options() const
+  const onert_tflite::SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SelectV2Options
-             ? static_cast<const SelectV2Options *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_SegmentSumOptions
+             ? static_cast<const onert_tflite::SegmentSumOptions *>(builtin_options())
               : nullptr;
    }
-  const DensifyOptions *builtin_options_as_DensifyOptions() const
+  const onert_tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_DensifyOptions
-             ? static_cast<const DensifyOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BatchMatMulOptions
+             ? static_cast<const onert_tflite::BatchMatMulOptions *>(builtin_options())
               : nullptr;
    }
-  const SegmentSumOptions *builtin_options_as_SegmentSumOptions() const
+  const onert_tflite::CumsumOptions *builtin_options_as_CumsumOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_SegmentSumOptions
-             ? static_cast<const SegmentSumOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CumsumOptions
+             ? static_cast<const onert_tflite::CumsumOptions *>(builtin_options())
               : nullptr;
    }
-  const BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const
+  const onert_tflite::CallOnceOptions *builtin_options_as_CallOnceOptions() const
    {
-    return builtin_options_type() == BuiltinOptions_BatchMatMulOptions
-             ? static_cast<const BatchMatMulOptions *>(builtin_options())
+    return builtin_options_type() == onert_tflite::BuiltinOptions_CallOnceOptions
+             ? static_cast<const onert_tflite::CallOnceOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_BroadcastToOptions
+             ? static_cast<const onert_tflite::BroadcastToOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::Rfft2dOptions *builtin_options_as_Rfft2dOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Rfft2dOptions
+             ? static_cast<const onert_tflite::Rfft2dOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::Conv3DOptions *builtin_options_as_Conv3DOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_Conv3DOptions
+             ? static_cast<const onert_tflite::Conv3DOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::HashtableOptions *builtin_options_as_HashtableOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableOptions
+             ? static_cast<const onert_tflite::HashtableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::HashtableFindOptions *builtin_options_as_HashtableFindOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableFindOptions
+             ? static_cast<const onert_tflite::HashtableFindOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::HashtableImportOptions *builtin_options_as_HashtableImportOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableImportOptions
+             ? static_cast<const onert_tflite::HashtableImportOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::HashtableSizeOptions *builtin_options_as_HashtableSizeOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_HashtableSizeOptions
+             ? static_cast<const onert_tflite::HashtableSizeOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::VarHandleOptions *builtin_options_as_VarHandleOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_VarHandleOptions
+             ? static_cast<const onert_tflite::VarHandleOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::ReadVariableOptions *builtin_options_as_ReadVariableOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_ReadVariableOptions
+             ? static_cast<const onert_tflite::ReadVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_AssignVariableOptions
+             ? static_cast<const onert_tflite::AssignVariableOptions *>(builtin_options())
+             : nullptr;
+  }
+  const onert_tflite::RandomOptions *builtin_options_as_RandomOptions() const
+  {
+    return builtin_options_type() == onert_tflite::BuiltinOptions_RandomOptions
+             ? static_cast<const onert_tflite::RandomOptions *>(builtin_options())
               : nullptr;
    }
    const flatbuffers::Vector<uint8_t> *custom_options() const
    {
      return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
    }
-  CustomOptionsFormat custom_options_format() const
+  onert_tflite::CustomOptionsFormat custom_options_format() const
    {
-    return static_cast<CustomOptionsFormat>(GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
+    return static_cast<onert_tflite::CustomOptionsFormat>(
+      GetField<int8_t>(VT_CUSTOM_OPTIONS_FORMAT, 0));
    }
    const flatbuffers::Vector<uint8_t> *mutating_variable_inputs() const
    {
@@ -7878,550 +9207,806 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    }
  };
  
-template <> inline const Conv2DOptions *Operator::builtin_options_as<Conv2DOptions>() const
+template <>
+inline const onert_tflite::Conv2DOptions *
+Operator::builtin_options_as<onert_tflite::Conv2DOptions>() const
  {
    return builtin_options_as_Conv2DOptions();
  }
  
  template <>
-inline const DepthwiseConv2DOptions *Operator::builtin_options_as<DepthwiseConv2DOptions>() const
+inline const onert_tflite::DepthwiseConv2DOptions *
+Operator::builtin_options_as<onert_tflite::DepthwiseConv2DOptions>() const
  {
    return builtin_options_as_DepthwiseConv2DOptions();
  }
  
  template <>
-inline const ConcatEmbeddingsOptions *Operator::builtin_options_as<ConcatEmbeddingsOptions>() const
+inline const onert_tflite::ConcatEmbeddingsOptions *
+Operator::builtin_options_as<onert_tflite::ConcatEmbeddingsOptions>() const
  {
    return builtin_options_as_ConcatEmbeddingsOptions();
  }
  
  template <>
-inline const LSHProjectionOptions *Operator::builtin_options_as<LSHProjectionOptions>() const
+inline const onert_tflite::LSHProjectionOptions *
+Operator::builtin_options_as<onert_tflite::LSHProjectionOptions>() const
  {
    return builtin_options_as_LSHProjectionOptions();
  }
  
-template <> inline const Pool2DOptions *Operator::builtin_options_as<Pool2DOptions>() const
+template <>
+inline const onert_tflite::Pool2DOptions *
+Operator::builtin_options_as<onert_tflite::Pool2DOptions>() const
  {
    return builtin_options_as_Pool2DOptions();
  }
  
-template <> inline const SVDFOptions *Operator::builtin_options_as<SVDFOptions>() const
+template <>
+inline const onert_tflite::SVDFOptions *
+Operator::builtin_options_as<onert_tflite::SVDFOptions>() const
  {
    return builtin_options_as_SVDFOptions();
  }
  
-template <> inline const RNNOptions *Operator::builtin_options_as<RNNOptions>() const
+template <>
+inline const onert_tflite::RNNOptions *
+Operator::builtin_options_as<onert_tflite::RNNOptions>() const
  {
    return builtin_options_as_RNNOptions();
  }
  
  template <>
-inline const FullyConnectedOptions *Operator::builtin_options_as<FullyConnectedOptions>() const
+inline const onert_tflite::FullyConnectedOptions *
+Operator::builtin_options_as<onert_tflite::FullyConnectedOptions>() const
  {
    return builtin_options_as_FullyConnectedOptions();
  }
  
-template <> inline const SoftmaxOptions *Operator::builtin_options_as<SoftmaxOptions>() const
+template <>
+inline const onert_tflite::SoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::SoftmaxOptions>() const
  {
    return builtin_options_as_SoftmaxOptions();
  }
  
  template <>
-inline const ConcatenationOptions *Operator::builtin_options_as<ConcatenationOptions>() const
+inline const onert_tflite::ConcatenationOptions *
+Operator::builtin_options_as<onert_tflite::ConcatenationOptions>() const
  {
    return builtin_options_as_ConcatenationOptions();
  }
  
-template <> inline const AddOptions *Operator::builtin_options_as<AddOptions>() const
+template <>
+inline const onert_tflite::AddOptions *
+Operator::builtin_options_as<onert_tflite::AddOptions>() const
  {
    return builtin_options_as_AddOptions();
  }
  
-template <> inline const L2NormOptions *Operator::builtin_options_as<L2NormOptions>() const
+template <>
+inline const onert_tflite::L2NormOptions *
+Operator::builtin_options_as<onert_tflite::L2NormOptions>() const
  {
    return builtin_options_as_L2NormOptions();
  }
  
  template <>
-inline const LocalResponseNormalizationOptions *
-Operator::builtin_options_as<LocalResponseNormalizationOptions>() const
+inline const onert_tflite::LocalResponseNormalizationOptions *
+Operator::builtin_options_as<onert_tflite::LocalResponseNormalizationOptions>() const
  {
    return builtin_options_as_LocalResponseNormalizationOptions();
  }
  
-template <> inline const LSTMOptions *Operator::builtin_options_as<LSTMOptions>() const
+template <>
+inline const onert_tflite::LSTMOptions *
+Operator::builtin_options_as<onert_tflite::LSTMOptions>() const
  {
    return builtin_options_as_LSTMOptions();
  }
  
  template <>
-inline const ResizeBilinearOptions *Operator::builtin_options_as<ResizeBilinearOptions>() const
+inline const onert_tflite::ResizeBilinearOptions *
+Operator::builtin_options_as<onert_tflite::ResizeBilinearOptions>() const
  {
    return builtin_options_as_ResizeBilinearOptions();
  }
  
-template <> inline const CallOptions *Operator::builtin_options_as<CallOptions>() const
+template <>
+inline const onert_tflite::CallOptions *
+Operator::builtin_options_as<onert_tflite::CallOptions>() const
  {
    return builtin_options_as_CallOptions();
  }
  
-template <> inline const ReshapeOptions *Operator::builtin_options_as<ReshapeOptions>() const
+template <>
+inline const onert_tflite::ReshapeOptions *
+Operator::builtin_options_as<onert_tflite::ReshapeOptions>() const
  {
    return builtin_options_as_ReshapeOptions();
  }
  
-template <> inline const SkipGramOptions *Operator::builtin_options_as<SkipGramOptions>() const
+template <>
+inline const onert_tflite::SkipGramOptions *
+Operator::builtin_options_as<onert_tflite::SkipGramOptions>() const
  {
    return builtin_options_as_SkipGramOptions();
  }
  
  template <>
-inline const SpaceToDepthOptions *Operator::builtin_options_as<SpaceToDepthOptions>() const
+inline const onert_tflite::SpaceToDepthOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToDepthOptions>() const
  {
    return builtin_options_as_SpaceToDepthOptions();
  }
  
  template <>
-inline const EmbeddingLookupSparseOptions *
-Operator::builtin_options_as<EmbeddingLookupSparseOptions>() const
+inline const onert_tflite::EmbeddingLookupSparseOptions *
+Operator::builtin_options_as<onert_tflite::EmbeddingLookupSparseOptions>() const
  {
    return builtin_options_as_EmbeddingLookupSparseOptions();
  }
  
-template <> inline const MulOptions *Operator::builtin_options_as<MulOptions>() const
+template <>
+inline const onert_tflite::MulOptions *
+Operator::builtin_options_as<onert_tflite::MulOptions>() const
  {
    return builtin_options_as_MulOptions();
  }
  
-template <> inline const PadOptions *Operator::builtin_options_as<PadOptions>() const
+template <>
+inline const onert_tflite::PadOptions *
+Operator::builtin_options_as<onert_tflite::PadOptions>() const
  {
    return builtin_options_as_PadOptions();
  }
  
-template <> inline const GatherOptions *Operator::builtin_options_as<GatherOptions>() const
+template <>
+inline const onert_tflite::GatherOptions *
+Operator::builtin_options_as<onert_tflite::GatherOptions>() const
  {
    return builtin_options_as_GatherOptions();
  }
  
  template <>
-inline const BatchToSpaceNDOptions *Operator::builtin_options_as<BatchToSpaceNDOptions>() const
+inline const onert_tflite::BatchToSpaceNDOptions *
+Operator::builtin_options_as<onert_tflite::BatchToSpaceNDOptions>() const
  {
    return builtin_options_as_BatchToSpaceNDOptions();
  }
  
  template <>
-inline const SpaceToBatchNDOptions *Operator::builtin_options_as<SpaceToBatchNDOptions>() const
+inline const onert_tflite::SpaceToBatchNDOptions *
+Operator::builtin_options_as<onert_tflite::SpaceToBatchNDOptions>() const
  {
    return builtin_options_as_SpaceToBatchNDOptions();
  }
  
-template <> inline const TransposeOptions *Operator::builtin_options_as<TransposeOptions>() const
+template <>
+inline const onert_tflite::TransposeOptions *
+Operator::builtin_options_as<onert_tflite::TransposeOptions>() const
  {
    return builtin_options_as_TransposeOptions();
  }
  
-template <> inline const ReducerOptions *Operator::builtin_options_as<ReducerOptions>() const
+template <>
+inline const onert_tflite::ReducerOptions *
+Operator::builtin_options_as<onert_tflite::ReducerOptions>() const
  {
    return builtin_options_as_ReducerOptions();
  }
  
-template <> inline const SubOptions *Operator::builtin_options_as<SubOptions>() const
+template <>
+inline const onert_tflite::SubOptions *
+Operator::builtin_options_as<onert_tflite::SubOptions>() const
  {
    return builtin_options_as_SubOptions();
  }
  
-template <> inline const DivOptions *Operator::builtin_options_as<DivOptions>() const
+template <>
+inline const onert_tflite::DivOptions *
+Operator::builtin_options_as<onert_tflite::DivOptions>() const
  {
    return builtin_options_as_DivOptions();
  }
  
-template <> inline const SqueezeOptions *Operator::builtin_options_as<SqueezeOptions>() const
+template <>
+inline const onert_tflite::SqueezeOptions *
+Operator::builtin_options_as<onert_tflite::SqueezeOptions>() const
  {
    return builtin_options_as_SqueezeOptions();
  }
  
  template <>
-inline const SequenceRNNOptions *Operator::builtin_options_as<SequenceRNNOptions>() const
+inline const onert_tflite::SequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::SequenceRNNOptions>() const
  {
    return builtin_options_as_SequenceRNNOptions();
  }
  
  template <>
-inline const StridedSliceOptions *Operator::builtin_options_as<StridedSliceOptions>() const
+inline const onert_tflite::StridedSliceOptions *
+Operator::builtin_options_as<onert_tflite::StridedSliceOptions>() const
  {
    return builtin_options_as_StridedSliceOptions();
  }
  
-template <> inline const ExpOptions *Operator::builtin_options_as<ExpOptions>() const
+template <>
+inline const onert_tflite::ExpOptions *
+Operator::builtin_options_as<onert_tflite::ExpOptions>() const
  {
    return builtin_options_as_ExpOptions();
  }
  
-template <> inline const TopKV2Options *Operator::builtin_options_as<TopKV2Options>() const
+template <>
+inline const onert_tflite::TopKV2Options *
+Operator::builtin_options_as<onert_tflite::TopKV2Options>() const
  {
    return builtin_options_as_TopKV2Options();
  }
  
-template <> inline const SplitOptions *Operator::builtin_options_as<SplitOptions>() const
+template <>
+inline const onert_tflite::SplitOptions *
+Operator::builtin_options_as<onert_tflite::SplitOptions>() const
  {
    return builtin_options_as_SplitOptions();
  }
  
-template <> inline const LogSoftmaxOptions *Operator::builtin_options_as<LogSoftmaxOptions>() const
+template <>
+inline const onert_tflite::LogSoftmaxOptions *
+Operator::builtin_options_as<onert_tflite::LogSoftmaxOptions>() const
  {
    return builtin_options_as_LogSoftmaxOptions();
  }
  
-template <> inline const CastOptions *Operator::builtin_options_as<CastOptions>() const
+template <>
+inline const onert_tflite::CastOptions *
+Operator::builtin_options_as<onert_tflite::CastOptions>() const
  {
    return builtin_options_as_CastOptions();
  }
  
-template <> inline const DequantizeOptions *Operator::builtin_options_as<DequantizeOptions>() const
+template <>
+inline const onert_tflite::DequantizeOptions *
+Operator::builtin_options_as<onert_tflite::DequantizeOptions>() const
  {
    return builtin_options_as_DequantizeOptions();
  }
  
  template <>
-inline const MaximumMinimumOptions *Operator::builtin_options_as<MaximumMinimumOptions>() const
+inline const onert_tflite::MaximumMinimumOptions *
+Operator::builtin_options_as<onert_tflite::MaximumMinimumOptions>() const
  {
    return builtin_options_as_MaximumMinimumOptions();
  }
  
-template <> inline const ArgMaxOptions *Operator::builtin_options_as<ArgMaxOptions>() const
+template <>
+inline const onert_tflite::ArgMaxOptions *
+Operator::builtin_options_as<onert_tflite::ArgMaxOptions>() const
  {
    return builtin_options_as_ArgMaxOptions();
  }
  
-template <> inline const LessOptions *Operator::builtin_options_as<LessOptions>() const
+template <>
+inline const onert_tflite::LessOptions *
+Operator::builtin_options_as<onert_tflite::LessOptions>() const
  {
    return builtin_options_as_LessOptions();
  }
  
-template <> inline const NegOptions *Operator::builtin_options_as<NegOptions>() const
+template <>
+inline const onert_tflite::NegOptions *
+Operator::builtin_options_as<onert_tflite::NegOptions>() const
  {
    return builtin_options_as_NegOptions();
  }
  
-template <> inline const PadV2Options *Operator::builtin_options_as<PadV2Options>() const
+template <>
+inline const onert_tflite::PadV2Options *
+Operator::builtin_options_as<onert_tflite::PadV2Options>() const
  {
    return builtin_options_as_PadV2Options();
  }
  
-template <> inline const GreaterOptions *Operator::builtin_options_as<GreaterOptions>() const
+template <>
+inline const onert_tflite::GreaterOptions *
+Operator::builtin_options_as<onert_tflite::GreaterOptions>() const
  {
    return builtin_options_as_GreaterOptions();
  }
  
  template <>
-inline const GreaterEqualOptions *Operator::builtin_options_as<GreaterEqualOptions>() const
+inline const onert_tflite::GreaterEqualOptions *
+Operator::builtin_options_as<onert_tflite::GreaterEqualOptions>() const
  {
    return builtin_options_as_GreaterEqualOptions();
  }
  
-template <> inline const LessEqualOptions *Operator::builtin_options_as<LessEqualOptions>() const
+template <>
+inline const onert_tflite::LessEqualOptions *
+Operator::builtin_options_as<onert_tflite::LessEqualOptions>() const
  {
    return builtin_options_as_LessEqualOptions();
  }
  
-template <> inline const SelectOptions *Operator::builtin_options_as<SelectOptions>() const
+template <>
+inline const onert_tflite::SelectOptions *
+Operator::builtin_options_as<onert_tflite::SelectOptions>() const
  {
    return builtin_options_as_SelectOptions();
  }
  
-template <> inline const SliceOptions *Operator::builtin_options_as<SliceOptions>() const
+template <>
+inline const onert_tflite::SliceOptions *
+Operator::builtin_options_as<onert_tflite::SliceOptions>() const
  {
    return builtin_options_as_SliceOptions();
  }
  
  template <>
-inline const TransposeConvOptions *Operator::builtin_options_as<TransposeConvOptions>() const
+inline const onert_tflite::TransposeConvOptions *
+Operator::builtin_options_as<onert_tflite::TransposeConvOptions>() const
  {
    return builtin_options_as_TransposeConvOptions();
  }
  
  template <>
-inline const SparseToDenseOptions *Operator::builtin_options_as<SparseToDenseOptions>() const
+inline const onert_tflite::SparseToDenseOptions *
+Operator::builtin_options_as<onert_tflite::SparseToDenseOptions>() const
  {
    return builtin_options_as_SparseToDenseOptions();
  }
  
-template <> inline const TileOptions *Operator::builtin_options_as<TileOptions>() const
+template <>
+inline const onert_tflite::TileOptions *
+Operator::builtin_options_as<onert_tflite::TileOptions>() const
  {
    return builtin_options_as_TileOptions();
  }
  
-template <> inline const ExpandDimsOptions *Operator::builtin_options_as<ExpandDimsOptions>() const
+template <>
+inline const onert_tflite::ExpandDimsOptions *
+Operator::builtin_options_as<onert_tflite::ExpandDimsOptions>() const
  {
    return builtin_options_as_ExpandDimsOptions();
  }
  
-template <> inline const EqualOptions *Operator::builtin_options_as<EqualOptions>() const
+template <>
+inline const onert_tflite::EqualOptions *
+Operator::builtin_options_as<onert_tflite::EqualOptions>() const
  {
    return builtin_options_as_EqualOptions();
  }
  
-template <> inline const NotEqualOptions *Operator::builtin_options_as<NotEqualOptions>() const
+template <>
+inline const onert_tflite::NotEqualOptions *
+Operator::builtin_options_as<onert_tflite::NotEqualOptions>() const
  {
    return builtin_options_as_NotEqualOptions();
  }
  
-template <> inline const ShapeOptions *Operator::builtin_options_as<ShapeOptions>() const
+template <>
+inline const onert_tflite::ShapeOptions *
+Operator::builtin_options_as<onert_tflite::ShapeOptions>() const
  {
    return builtin_options_as_ShapeOptions();
  }
  
-template <> inline const PowOptions *Operator::builtin_options_as<PowOptions>() const
+template <>
+inline const onert_tflite::PowOptions *
+Operator::builtin_options_as<onert_tflite::PowOptions>() const
  {
    return builtin_options_as_PowOptions();
  }
  
-template <> inline const ArgMinOptions *Operator::builtin_options_as<ArgMinOptions>() const
+template <>
+inline const onert_tflite::ArgMinOptions *
+Operator::builtin_options_as<onert_tflite::ArgMinOptions>() const
  {
    return builtin_options_as_ArgMinOptions();
  }
  
-template <> inline const FakeQuantOptions *Operator::builtin_options_as<FakeQuantOptions>() const
+template <>
+inline const onert_tflite::FakeQuantOptions *
+Operator::builtin_options_as<onert_tflite::FakeQuantOptions>() const
  {
    return builtin_options_as_FakeQuantOptions();
  }
  
-template <> inline const PackOptions *Operator::builtin_options_as<PackOptions>() const
+template <>
+inline const onert_tflite::PackOptions *
+Operator::builtin_options_as<onert_tflite::PackOptions>() const
  {
    return builtin_options_as_PackOptions();
  }
  
-template <> inline const LogicalOrOptions *Operator::builtin_options_as<LogicalOrOptions>() const
+template <>
+inline const onert_tflite::LogicalOrOptions *
+Operator::builtin_options_as<onert_tflite::LogicalOrOptions>() const
  {
    return builtin_options_as_LogicalOrOptions();
  }
  
-template <> inline const OneHotOptions *Operator::builtin_options_as<OneHotOptions>() const
+template <>
+inline const onert_tflite::OneHotOptions *
+Operator::builtin_options_as<onert_tflite::OneHotOptions>() const
  {
    return builtin_options_as_OneHotOptions();
  }
  
-template <> inline const LogicalAndOptions *Operator::builtin_options_as<LogicalAndOptions>() const
+template <>
+inline const onert_tflite::LogicalAndOptions *
+Operator::builtin_options_as<onert_tflite::LogicalAndOptions>() const
  {
    return builtin_options_as_LogicalAndOptions();
  }
  
-template <> inline const LogicalNotOptions *Operator::builtin_options_as<LogicalNotOptions>() const
+template <>
+inline const onert_tflite::LogicalNotOptions *
+Operator::builtin_options_as<onert_tflite::LogicalNotOptions>() const
  {
    return builtin_options_as_LogicalNotOptions();
  }
  
-template <> inline const UnpackOptions *Operator::builtin_options_as<UnpackOptions>() const
+template <>
+inline const onert_tflite::UnpackOptions *
+Operator::builtin_options_as<onert_tflite::UnpackOptions>() const
  {
    return builtin_options_as_UnpackOptions();
  }
  
-template <> inline const FloorDivOptions *Operator::builtin_options_as<FloorDivOptions>() const
+template <>
+inline const onert_tflite::FloorDivOptions *
+Operator::builtin_options_as<onert_tflite::FloorDivOptions>() const
  {
    return builtin_options_as_FloorDivOptions();
  }
  
-template <> inline const SquareOptions *Operator::builtin_options_as<SquareOptions>() const
+template <>
+inline const onert_tflite::SquareOptions *
+Operator::builtin_options_as<onert_tflite::SquareOptions>() const
  {
    return builtin_options_as_SquareOptions();
  }
  
-template <> inline const ZerosLikeOptions *Operator::builtin_options_as<ZerosLikeOptions>() const
+template <>
+inline const onert_tflite::ZerosLikeOptions *
+Operator::builtin_options_as<onert_tflite::ZerosLikeOptions>() const
  {
    return builtin_options_as_ZerosLikeOptions();
  }
  
-template <> inline const FillOptions *Operator::builtin_options_as<FillOptions>() const
+template <>
+inline const onert_tflite::FillOptions *
+Operator::builtin_options_as<onert_tflite::FillOptions>() const
  {
    return builtin_options_as_FillOptions();
  }
  
  template <>
-inline const BidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<BidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::BidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceLSTMOptions>() const
  {
    return builtin_options_as_BidirectionalSequenceLSTMOptions();
  }
  
  template <>
-inline const BidirectionalSequenceRNNOptions *
-Operator::builtin_options_as<BidirectionalSequenceRNNOptions>() const
+inline const onert_tflite::BidirectionalSequenceRNNOptions *
+Operator::builtin_options_as<onert_tflite::BidirectionalSequenceRNNOptions>() const
  {
    return builtin_options_as_BidirectionalSequenceRNNOptions();
  }
  
  template <>
-inline const UnidirectionalSequenceLSTMOptions *
-Operator::builtin_options_as<UnidirectionalSequenceLSTMOptions>() const
+inline const onert_tflite::UnidirectionalSequenceLSTMOptions *
+Operator::builtin_options_as<onert_tflite::UnidirectionalSequenceLSTMOptions>() const
  {
    return builtin_options_as_UnidirectionalSequenceLSTMOptions();
  }
  
-template <> inline const FloorModOptions *Operator::builtin_options_as<FloorModOptions>() const
+template <>
+inline const onert_tflite::FloorModOptions *
+Operator::builtin_options_as<onert_tflite::FloorModOptions>() const
  {
    return builtin_options_as_FloorModOptions();
  }
  
-template <> inline const RangeOptions *Operator::builtin_options_as<RangeOptions>() const
+template <>
+inline const onert_tflite::RangeOptions *
+Operator::builtin_options_as<onert_tflite::RangeOptions>() const
  {
    return builtin_options_as_RangeOptions();
  }
  
  template <>
-inline const ResizeNearestNeighborOptions *
-Operator::builtin_options_as<ResizeNearestNeighborOptions>() const
+inline const onert_tflite::ResizeNearestNeighborOptions *
+Operator::builtin_options_as<onert_tflite::ResizeNearestNeighborOptions>() const
  {
    return builtin_options_as_ResizeNearestNeighborOptions();
  }
  
-template <> inline const LeakyReluOptions *Operator::builtin_options_as<LeakyReluOptions>() const
+template <>
+inline const onert_tflite::LeakyReluOptions *
+Operator::builtin_options_as<onert_tflite::LeakyReluOptions>() const
  {
    return builtin_options_as_LeakyReluOptions();
  }
  
  template <>
-inline const SquaredDifferenceOptions *
-Operator::builtin_options_as<SquaredDifferenceOptions>() const
+inline const onert_tflite::SquaredDifferenceOptions *
+Operator::builtin_options_as<onert_tflite::SquaredDifferenceOptions>() const
  {
    return builtin_options_as_SquaredDifferenceOptions();
  }
  
-template <> inline const MirrorPadOptions *Operator::builtin_options_as<MirrorPadOptions>() const
+template <>
+inline const onert_tflite::MirrorPadOptions *
+Operator::builtin_options_as<onert_tflite::MirrorPadOptions>() const
  {
    return builtin_options_as_MirrorPadOptions();
  }
  
-template <> inline const AbsOptions *Operator::builtin_options_as<AbsOptions>() const
+template <>
+inline const onert_tflite::AbsOptions *
+Operator::builtin_options_as<onert_tflite::AbsOptions>() const
  {
    return builtin_options_as_AbsOptions();
  }
  
-template <> inline const SplitVOptions *Operator::builtin_options_as<SplitVOptions>() const
+template <>
+inline const onert_tflite::SplitVOptions *
+Operator::builtin_options_as<onert_tflite::SplitVOptions>() const
  {
    return builtin_options_as_SplitVOptions();
  }
  
-template <> inline const UniqueOptions *Operator::builtin_options_as<UniqueOptions>() const
+template <>
+inline const onert_tflite::UniqueOptions *
+Operator::builtin_options_as<onert_tflite::UniqueOptions>() const
  {
    return builtin_options_as_UniqueOptions();
  }
  
-template <> inline const ReverseV2Options *Operator::builtin_options_as<ReverseV2Options>() const
+template <>
+inline const onert_tflite::ReverseV2Options *
+Operator::builtin_options_as<onert_tflite::ReverseV2Options>() const
  {
    return builtin_options_as_ReverseV2Options();
  }
  
-template <> inline const AddNOptions *Operator::builtin_options_as<AddNOptions>() const
+template <>
+inline const onert_tflite::AddNOptions *
+Operator::builtin_options_as<onert_tflite::AddNOptions>() const
  {
    return builtin_options_as_AddNOptions();
  }
  
-template <> inline const GatherNdOptions *Operator::builtin_options_as<GatherNdOptions>() const
+template <>
+inline const onert_tflite::GatherNdOptions *
+Operator::builtin_options_as<onert_tflite::GatherNdOptions>() const
  {
    return builtin_options_as_GatherNdOptions();
  }
  
-template <> inline const CosOptions *Operator::builtin_options_as<CosOptions>() const
+template <>
+inline const onert_tflite::CosOptions *
+Operator::builtin_options_as<onert_tflite::CosOptions>() const
  {
    return builtin_options_as_CosOptions();
  }
  
-template <> inline const WhereOptions *Operator::builtin_options_as<WhereOptions>() const
+template <>
+inline const onert_tflite::WhereOptions *
+Operator::builtin_options_as<onert_tflite::WhereOptions>() const
  {
    return builtin_options_as_WhereOptions();
  }
  
-template <> inline const RankOptions *Operator::builtin_options_as<RankOptions>() const
+template <>
+inline const onert_tflite::RankOptions *
+Operator::builtin_options_as<onert_tflite::RankOptions>() const
  {
    return builtin_options_as_RankOptions();
  }
  
  template <>
-inline const ReverseSequenceOptions *Operator::builtin_options_as<ReverseSequenceOptions>() const
+inline const onert_tflite::ReverseSequenceOptions *
+Operator::builtin_options_as<onert_tflite::ReverseSequenceOptions>() const
  {
    return builtin_options_as_ReverseSequenceOptions();
  }
  
-template <> inline const MatrixDiagOptions *Operator::builtin_options_as<MatrixDiagOptions>() const
+template <>
+inline const onert_tflite::MatrixDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixDiagOptions>() const
  {
    return builtin_options_as_MatrixDiagOptions();
  }
  
-template <> inline const QuantizeOptions *Operator::builtin_options_as<QuantizeOptions>() const
+template <>
+inline const onert_tflite::QuantizeOptions *
+Operator::builtin_options_as<onert_tflite::QuantizeOptions>() const
  {
    return builtin_options_as_QuantizeOptions();
  }
  
  template <>
-inline const MatrixSetDiagOptions *Operator::builtin_options_as<MatrixSetDiagOptions>() const
+inline const onert_tflite::MatrixSetDiagOptions *
+Operator::builtin_options_as<onert_tflite::MatrixSetDiagOptions>() const
  {
    return builtin_options_as_MatrixSetDiagOptions();
  }
  
-template <> inline const HardSwishOptions *Operator::builtin_options_as<HardSwishOptions>() const
+template <>
+inline const onert_tflite::HardSwishOptions *
+Operator::builtin_options_as<onert_tflite::HardSwishOptions>() const
  {
    return builtin_options_as_HardSwishOptions();
  }
  
-template <> inline const IfOptions *Operator::builtin_options_as<IfOptions>() const
+template <>
+inline const onert_tflite::IfOptions *Operator::builtin_options_as<onert_tflite::IfOptions>() const
  {
    return builtin_options_as_IfOptions();
  }
  
-template <> inline const WhileOptions *Operator::builtin_options_as<WhileOptions>() const
+template <>
+inline const onert_tflite::WhileOptions *
+Operator::builtin_options_as<onert_tflite::WhileOptions>() const
  {
    return builtin_options_as_WhileOptions();
  }
  
  template <>
-inline const DepthToSpaceOptions *Operator::builtin_options_as<DepthToSpaceOptions>() const
+inline const onert_tflite::DepthToSpaceOptions *
+Operator::builtin_options_as<onert_tflite::DepthToSpaceOptions>() const
  {
    return builtin_options_as_DepthToSpaceOptions();
  }
  
  template <>
-inline const NonMaxSuppressionV4Options *
-Operator::builtin_options_as<NonMaxSuppressionV4Options>() const
+inline const onert_tflite::NonMaxSuppressionV4Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV4Options>() const
  {
    return builtin_options_as_NonMaxSuppressionV4Options();
  }
  
  template <>
-inline const NonMaxSuppressionV5Options *
-Operator::builtin_options_as<NonMaxSuppressionV5Options>() const
+inline const onert_tflite::NonMaxSuppressionV5Options *
+Operator::builtin_options_as<onert_tflite::NonMaxSuppressionV5Options>() const
  {
    return builtin_options_as_NonMaxSuppressionV5Options();
  }
  
-template <> inline const ScatterNdOptions *Operator::builtin_options_as<ScatterNdOptions>() const
+template <>
+inline const onert_tflite::ScatterNdOptions *
+Operator::builtin_options_as<onert_tflite::ScatterNdOptions>() const
  {
    return builtin_options_as_ScatterNdOptions();
  }
  
-template <> inline const SelectV2Options *Operator::builtin_options_as<SelectV2Options>() const
+template <>
+inline const onert_tflite::SelectV2Options *
+Operator::builtin_options_as<onert_tflite::SelectV2Options>() const
  {
    return builtin_options_as_SelectV2Options();
  }
  
-template <> inline const DensifyOptions *Operator::builtin_options_as<DensifyOptions>() const
+template <>
+inline const onert_tflite::DensifyOptions *
+Operator::builtin_options_as<onert_tflite::DensifyOptions>() const
  {
    return builtin_options_as_DensifyOptions();
  }
  
-template <> inline const SegmentSumOptions *Operator::builtin_options_as<SegmentSumOptions>() const
+template <>
+inline const onert_tflite::SegmentSumOptions *
+Operator::builtin_options_as<onert_tflite::SegmentSumOptions>() const
  {
    return builtin_options_as_SegmentSumOptions();
  }
  
  template <>
-inline const BatchMatMulOptions *Operator::builtin_options_as<BatchMatMulOptions>() const
+inline const onert_tflite::BatchMatMulOptions *
+Operator::builtin_options_as<onert_tflite::BatchMatMulOptions>() const
  {
    return builtin_options_as_BatchMatMulOptions();
  }
  
+template <>
+inline const onert_tflite::CumsumOptions *
+Operator::builtin_options_as<onert_tflite::CumsumOptions>() const
+{
+  return builtin_options_as_CumsumOptions();
+}
+
+template <>
+inline const onert_tflite::CallOnceOptions *
+Operator::builtin_options_as<onert_tflite::CallOnceOptions>() const
+{
+  return builtin_options_as_CallOnceOptions();
+}
+
+template <>
+inline const onert_tflite::BroadcastToOptions *
+Operator::builtin_options_as<onert_tflite::BroadcastToOptions>() const
+{
+  return builtin_options_as_BroadcastToOptions();
+}
+
+template <>
+inline const onert_tflite::Rfft2dOptions *
+Operator::builtin_options_as<onert_tflite::Rfft2dOptions>() const
+{
+  return builtin_options_as_Rfft2dOptions();
+}
+
+template <>
+inline const onert_tflite::Conv3DOptions *
+Operator::builtin_options_as<onert_tflite::Conv3DOptions>() const
+{
+  return builtin_options_as_Conv3DOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableOptions *
+Operator::builtin_options_as<onert_tflite::HashtableOptions>() const
+{
+  return builtin_options_as_HashtableOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableFindOptions *
+Operator::builtin_options_as<onert_tflite::HashtableFindOptions>() const
+{
+  return builtin_options_as_HashtableFindOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableImportOptions *
+Operator::builtin_options_as<onert_tflite::HashtableImportOptions>() const
+{
+  return builtin_options_as_HashtableImportOptions();
+}
+
+template <>
+inline const onert_tflite::HashtableSizeOptions *
+Operator::builtin_options_as<onert_tflite::HashtableSizeOptions>() const
+{
+  return builtin_options_as_HashtableSizeOptions();
+}
+
+template <>
+inline const onert_tflite::VarHandleOptions *
+Operator::builtin_options_as<onert_tflite::VarHandleOptions>() const
+{
+  return builtin_options_as_VarHandleOptions();
+}
+
+template <>
+inline const onert_tflite::ReadVariableOptions *
+Operator::builtin_options_as<onert_tflite::ReadVariableOptions>() const
+{
+  return builtin_options_as_ReadVariableOptions();
+}
+
+template <>
+inline const onert_tflite::AssignVariableOptions *
+Operator::builtin_options_as<onert_tflite::AssignVariableOptions>() const
+{
+  return builtin_options_as_AssignVariableOptions();
+}
+
+template <>
+inline const onert_tflite::RandomOptions *
+Operator::builtin_options_as<onert_tflite::RandomOptions>() const
+{
+  return builtin_options_as_RandomOptions();
+}
+
  struct OperatorBuilder
  {
+  typedef Operator Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_opcode_index(uint32_t opcode_index)
@@ -8436,7 +10021,7 @@ struct OperatorBuilder
    {
      fbb_.AddOffset(Operator::VT_OUTPUTS, outputs);
    }
-  void add_builtin_options_type(BuiltinOptions builtin_options_type)
+  void add_builtin_options_type(onert_tflite::BuiltinOptions builtin_options_type)
    {
      fbb_.AddElement<uint8_t>(Operator::VT_BUILTIN_OPTIONS_TYPE,
                               static_cast<uint8_t>(builtin_options_type), 0);
@@ -8449,7 +10034,7 @@ struct OperatorBuilder
    {
      fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options);
    }
-  void add_custom_options_format(CustomOptionsFormat custom_options_format)
+  void add_custom_options_format(onert_tflite::CustomOptionsFormat custom_options_format)
    {
      fbb_.AddElement<int8_t>(Operator::VT_CUSTOM_OPTIONS_FORMAT,
                              static_cast<int8_t>(custom_options_format), 0);
@@ -8467,7 +10052,6 @@ struct OperatorBuilder
    {
      start_ = fbb_.StartTable();
    }
-  OperatorBuilder &operator=(const OperatorBuilder &);
    flatbuffers::Offset<Operator> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8476,16 +10060,17 @@ struct OperatorBuilder
    }
  };
  
-inline flatbuffers::Offset<Operator>
-CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-               BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-               flatbuffers::Offset<void> builtin_options = 0,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
-               CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-               flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
-               flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
+inline flatbuffers::Offset<Operator> CreateOperator(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
+  onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom_options = 0,
+  onert_tflite::CustomOptionsFormat custom_options_format =
+    onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+  flatbuffers::Offset<flatbuffers::Vector<uint8_t>> mutating_variable_inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<int32_t>> intermediates = 0)
  {
    OperatorBuilder builder_(_fbb);
    builder_.add_intermediates(intermediates);
@@ -8500,28 +10085,32 @@ CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
    return builder_.Finish();
  }
  
-inline flatbuffers::Offset<Operator>
-CreateOperatorDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
-                     const std::vector<int32_t> *inputs = nullptr,
-                     const std::vector<int32_t> *outputs = nullptr,
-                     BuiltinOptions builtin_options_type = BuiltinOptions_NONE,
-                     flatbuffers::Offset<void> builtin_options = 0,
-                     const std::vector<uint8_t> *custom_options = nullptr,
-                     CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS,
-                     const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
-                     const std::vector<int32_t> *intermediates = nullptr)
-{
-  return onert_tflite::CreateOperator(
-    _fbb, opcode_index, inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-    outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0, builtin_options_type, builtin_options,
-    custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0, custom_options_format,
-    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0,
-    intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0);
+inline flatbuffers::Offset<Operator> CreateOperatorDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0,
+  const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
+  onert_tflite::BuiltinOptions builtin_options_type = onert_tflite::BuiltinOptions_NONE,
+  flatbuffers::Offset<void> builtin_options = 0,
+  const std::vector<uint8_t> *custom_options = nullptr,
+  onert_tflite::CustomOptionsFormat custom_options_format =
+    onert_tflite::CustomOptionsFormat_FLEXBUFFERS,
+  const std::vector<uint8_t> *mutating_variable_inputs = nullptr,
+  const std::vector<int32_t> *intermediates = nullptr)
+{
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto custom_options__ = custom_options ? _fbb.CreateVector<uint8_t>(*custom_options) : 0;
+  auto mutating_variable_inputs__ =
+    mutating_variable_inputs ? _fbb.CreateVector<uint8_t>(*mutating_variable_inputs) : 0;
+  auto intermediates__ = intermediates ? _fbb.CreateVector<int32_t>(*intermediates) : 0;
+  return onert_tflite::CreateOperator(_fbb, opcode_index, inputs__, outputs__, builtin_options_type,
+                                      builtin_options, custom_options__, custom_options_format,
+                                      mutating_variable_inputs__, intermediates__);
  }
  
  struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef SubGraphBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_TENSORS = 4,
      VT_INPUTS = 6,
@@ -8529,9 +10118,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_OPERATORS = 10,
      VT_NAME = 12
    };
-  const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *tensors() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Tensor>> *>(VT_TENSORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>> *>(
+      VT_TENSORS);
    }
    const flatbuffers::Vector<int32_t> *inputs() const
    {
@@ -8541,9 +10131,10 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
    {
      return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_OUTPUTS);
    }
-  const flatbuffers::Vector<flatbuffers::Offset<Operator>> *operators() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *operators() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Operator>> *>(VT_OPERATORS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>> *>(
+      VT_OPERATORS);
    }
    const flatbuffers::String *name() const
    {
@@ -8563,9 +10154,11 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct SubGraphBuilder
  {
+  typedef SubGraph Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
-  void add_tensors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors)
+  void add_tensors(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors)
    {
      fbb_.AddOffset(SubGraph::VT_TENSORS, tensors);
    }
@@ -8577,8 +10170,8 @@ struct SubGraphBuilder
    {
      fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs);
    }
-  void
-  add_operators(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators)
+  void add_operators(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators)
    {
      fbb_.AddOffset(SubGraph::VT_OPERATORS, operators);
    }
@@ -8590,7 +10183,6 @@ struct SubGraphBuilder
    {
      start_ = fbb_.StartTable();
    }
-  SubGraphBuilder &operator=(const SubGraphBuilder &);
    flatbuffers::Offset<SubGraph> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8601,10 +10193,11 @@ struct SubGraphBuilder
  
  inline flatbuffers::Offset<SubGraph> CreateSubGraph(
    flatbuffers::FlatBufferBuilder &_fbb,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Tensor>>> tensors = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Tensor>>> tensors = 0,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> inputs = 0,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> outputs = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Operator>>> operators = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Operator>>> operators =
+    0,
    flatbuffers::Offset<flatbuffers::String> name = 0)
  {
    SubGraphBuilder builder_(_fbb);
@@ -8618,21 +10211,25 @@ inline flatbuffers::Offset<SubGraph> CreateSubGraph(
  
  inline flatbuffers::Offset<SubGraph> CreateSubGraphDirect(
    flatbuffers::FlatBufferBuilder &_fbb,
-  const std::vector<flatbuffers::Offset<Tensor>> *tensors = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Tensor>> *tensors = nullptr,
    const std::vector<int32_t> *inputs = nullptr, const std::vector<int32_t> *outputs = nullptr,
-  const std::vector<flatbuffers::Offset<Operator>> *operators = nullptr, const char *name = nullptr)
+  const std::vector<flatbuffers::Offset<onert_tflite::Operator>> *operators = nullptr,
+  const char *name = nullptr)
  {
-  return onert_tflite::CreateSubGraph(
-    _fbb, tensors ? _fbb.CreateVector<flatbuffers::Offset<Tensor>>(*tensors) : 0,
-    inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0,
-    outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0,
-    operators ? _fbb.CreateVector<flatbuffers::Offset<Operator>>(*operators) : 0,
-    name ? _fbb.CreateString(name) : 0);
+  auto tensors__ =
+    tensors ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Tensor>>(*tensors) : 0;
+  auto inputs__ = inputs ? _fbb.CreateVector<int32_t>(*inputs) : 0;
+  auto outputs__ = outputs ? _fbb.CreateVector<int32_t>(*outputs) : 0;
+  auto operators__ =
+    operators ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Operator>>(*operators) : 0;
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateSubGraph(_fbb, tensors__, inputs__, outputs__, operators__, name__);
  }
  
  struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef BufferBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_DATA = 4
    };
@@ -8649,6 +10246,7 @@ struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct BufferBuilder
  {
+  typedef Buffer Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_data(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> data)
@@ -8659,7 +10257,6 @@ struct BufferBuilder
    {
      start_ = fbb_.StartTable();
    }
-  BufferBuilder &operator=(const BufferBuilder &);
    flatbuffers::Offset<Buffer> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8680,12 +10277,18 @@ CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb,
  inline flatbuffers::Offset<Buffer> CreateBufferDirect(flatbuffers::FlatBufferBuilder &_fbb,
                                                        const std::vector<uint8_t> *data = nullptr)
  {
-  return onert_tflite::CreateBuffer(_fbb, data ? _fbb.CreateVector<uint8_t>(*data) : 0);
+  if (data)
+  {
+    _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16);
+  }
+  auto data__ = data ? _fbb.CreateVector<uint8_t>(*data) : 0;
+  return onert_tflite::CreateBuffer(_fbb, data__);
  }
  
  struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef MetadataBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_NAME = 4,
      VT_BUFFER = 6
@@ -8705,6 +10308,7 @@ struct Metadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  
  struct MetadataBuilder
  {
+  typedef Metadata Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_name(flatbuffers::Offset<flatbuffers::String> name)
@@ -8716,7 +10320,6 @@ struct MetadataBuilder
    {
      start_ = fbb_.StartTable();
    }
-  MetadataBuilder &operator=(const MetadataBuilder &);
    flatbuffers::Offset<Metadata> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8739,12 +10342,179 @@ inline flatbuffers::Offset<Metadata> CreateMetadataDirect(flatbuffers::FlatBuffe
                                                            const char *name = nullptr,
                                                            uint32_t buffer = 0)
  {
-  return onert_tflite::CreateMetadata(_fbb, name ? _fbb.CreateString(name) : 0, buffer);
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateMetadata(_fbb, name__, buffer);
+}
+
+struct TensorMap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef TensorMapBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_NAME = 4,
+    VT_TENSOR_INDEX = 6
+  };
+  const flatbuffers::String *name() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_NAME);
+  }
+  uint32_t tensor_index() const { return GetField<uint32_t>(VT_TENSOR_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
+           verifier.VerifyString(name()) && VerifyField<uint32_t>(verifier, VT_TENSOR_INDEX) &&
+           verifier.EndTable();
+  }
+};
+
+struct TensorMapBuilder
+{
+  typedef TensorMap Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_name(flatbuffers::Offset<flatbuffers::String> name)
+  {
+    fbb_.AddOffset(TensorMap::VT_NAME, name);
+  }
+  void add_tensor_index(uint32_t tensor_index)
+  {
+    fbb_.AddElement<uint32_t>(TensorMap::VT_TENSOR_INDEX, tensor_index, 0);
+  }
+  explicit TensorMapBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<TensorMap> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<TensorMap>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<TensorMap>
+CreateTensorMap(flatbuffers::FlatBufferBuilder &_fbb,
+                flatbuffers::Offset<flatbuffers::String> name = 0, uint32_t tensor_index = 0)
+{
+  TensorMapBuilder builder_(_fbb);
+  builder_.add_tensor_index(tensor_index);
+  builder_.add_name(name);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<TensorMap> CreateTensorMapDirect(flatbuffers::FlatBufferBuilder &_fbb,
+                                                            const char *name = nullptr,
+                                                            uint32_t tensor_index = 0)
+{
+  auto name__ = name ? _fbb.CreateString(name) : 0;
+  return onert_tflite::CreateTensorMap(_fbb, name__, tensor_index);
+}
+
+struct SignatureDef FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
+{
+  typedef SignatureDefBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
+  {
+    VT_INPUTS = 4,
+    VT_OUTPUTS = 6,
+    VT_SIGNATURE_KEY = 8,
+    VT_SUBGRAPH_INDEX = 12
+  };
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+      VT_INPUTS);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>> *>(
+      VT_OUTPUTS);
+  }
+  const flatbuffers::String *signature_key() const
+  {
+    return GetPointer<const flatbuffers::String *>(VT_SIGNATURE_KEY);
+  }
+  uint32_t subgraph_index() const { return GetField<uint32_t>(VT_SUBGRAPH_INDEX, 0); }
+  bool Verify(flatbuffers::Verifier &verifier) const
+  {
+    return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_INPUTS) &&
+           verifier.VerifyVector(inputs()) && verifier.VerifyVectorOfTables(inputs()) &&
+           VerifyOffset(verifier, VT_OUTPUTS) && verifier.VerifyVector(outputs()) &&
+           verifier.VerifyVectorOfTables(outputs()) && VerifyOffset(verifier, VT_SIGNATURE_KEY) &&
+           verifier.VerifyString(signature_key()) &&
+           VerifyField<uint32_t>(verifier, VT_SUBGRAPH_INDEX) && verifier.EndTable();
+  }
+};
+
+struct SignatureDefBuilder
+{
+  typedef SignatureDef Table;
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_inputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_INPUTS, inputs);
+  }
+  void add_outputs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs)
+  {
+    fbb_.AddOffset(SignatureDef::VT_OUTPUTS, outputs);
+  }
+  void add_signature_key(flatbuffers::Offset<flatbuffers::String> signature_key)
+  {
+    fbb_.AddOffset(SignatureDef::VT_SIGNATURE_KEY, signature_key);
+  }
+  void add_subgraph_index(uint32_t subgraph_index)
+  {
+    fbb_.AddElement<uint32_t>(SignatureDef::VT_SUBGRAPH_INDEX, subgraph_index, 0);
+  }
+  explicit SignatureDefBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
+  {
+    start_ = fbb_.StartTable();
+  }
+  flatbuffers::Offset<SignatureDef> Finish()
+  {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SignatureDef>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDef(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> inputs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::TensorMap>>> outputs =
+    0,
+  flatbuffers::Offset<flatbuffers::String> signature_key = 0, uint32_t subgraph_index = 0)
+{
+  SignatureDefBuilder builder_(_fbb);
+  builder_.add_subgraph_index(subgraph_index);
+  builder_.add_signature_key(signature_key);
+  builder_.add_outputs(outputs);
+  builder_.add_inputs(inputs);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SignatureDef> CreateSignatureDefDirect(
+  flatbuffers::FlatBufferBuilder &_fbb,
+  const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *inputs = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::TensorMap>> *outputs = nullptr,
+  const char *signature_key = nullptr, uint32_t subgraph_index = 0)
+{
+  auto inputs__ =
+    inputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*inputs) : 0;
+  auto outputs__ =
+    outputs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::TensorMap>>(*outputs) : 0;
+  auto signature_key__ = signature_key ? _fbb.CreateString(signature_key) : 0;
+  return onert_tflite::CreateSignatureDef(_fbb, inputs__, outputs__, signature_key__,
+                                          subgraph_index);
  }
  
  struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
  {
-  enum
+  typedef ModelBuilder Builder;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
    {
      VT_VERSION = 4,
      VT_OPERATOR_CODES = 6,
@@ -8752,33 +10522,42 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
      VT_DESCRIPTION = 10,
      VT_BUFFERS = 12,
      VT_METADATA_BUFFER = 14,
-    VT_METADATA = 16
+    VT_METADATA = 16,
+    VT_SIGNATURE_DEFS = 18
    };
    uint32_t version() const { return GetField<uint32_t>(VT_VERSION, 0); }
-  const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *operator_codes() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<OperatorCode>> *>(
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *>(
        VT_OPERATOR_CODES);
    }
-  const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *subgraphs() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<SubGraph>> *>(VT_SUBGRAPHS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>> *>(
+      VT_SUBGRAPHS);
    }
    const flatbuffers::String *description() const
    {
      return GetPointer<const flatbuffers::String *>(VT_DESCRIPTION);
    }
-  const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *buffers() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Buffer>> *>(VT_BUFFERS);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>> *>(
+      VT_BUFFERS);
    }
    const flatbuffers::Vector<int32_t> *metadata_buffer() const
    {
      return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_METADATA_BUFFER);
    }
-  const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *metadata() const
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata() const
+  {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>> *>(
+      VT_METADATA);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs() const
    {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<Metadata>> *>(VT_METADATA);
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *>(
+      VT_SIGNATURE_DEFS);
    }
    bool Verify(flatbuffers::Verifier &verifier) const
    {
@@ -8791,22 +10570,26 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table
             verifier.VerifyVector(buffers()) && verifier.VerifyVectorOfTables(buffers()) &&
             VerifyOffset(verifier, VT_METADATA_BUFFER) && verifier.VerifyVector(metadata_buffer()) &&
             VerifyOffset(verifier, VT_METADATA) && verifier.VerifyVector(metadata()) &&
-           verifier.VerifyVectorOfTables(metadata()) && verifier.EndTable();
+           verifier.VerifyVectorOfTables(metadata()) && VerifyOffset(verifier, VT_SIGNATURE_DEFS) &&
+           verifier.VerifyVector(signature_defs()) &&
+           verifier.VerifyVectorOfTables(signature_defs()) && verifier.EndTable();
    }
  };
  
  struct ModelBuilder
  {
+  typedef Model Table;
    flatbuffers::FlatBufferBuilder &fbb_;
    flatbuffers::uoffset_t start_;
    void add_version(uint32_t version) { fbb_.AddElement<uint32_t>(Model::VT_VERSION, version, 0); }
    void add_operator_codes(
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes)
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+      operator_codes)
    {
      fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes);
    }
-  void
-  add_subgraphs(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs)
+  void add_subgraphs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs)
    {
      fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs);
    }
@@ -8814,7 +10597,8 @@ struct ModelBuilder
    {
      fbb_.AddOffset(Model::VT_DESCRIPTION, description);
    }
-  void add_buffers(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers)
+  void add_buffers(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers)
    {
      fbb_.AddOffset(Model::VT_BUFFERS, buffers);
    }
@@ -8822,16 +10606,21 @@ struct ModelBuilder
    {
      fbb_.AddOffset(Model::VT_METADATA_BUFFER, metadata_buffer);
    }
-  void
-  add_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata)
+  void add_metadata(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata)
    {
      fbb_.AddOffset(Model::VT_METADATA, metadata);
    }
+  void add_signature_defs(
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+      signature_defs)
+  {
+    fbb_.AddOffset(Model::VT_SIGNATURE_DEFS, signature_defs);
+  }
    explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
    {
      start_ = fbb_.StartTable();
    }
-  ModelBuilder &operator=(const ModelBuilder &);
    flatbuffers::Offset<Model> Finish()
    {
      const auto end = fbb_.EndTable(start_);
@@ -8842,14 +10631,20 @@ struct ModelBuilder
  
  inline flatbuffers::Offset<Model> CreateModel(
    flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<OperatorCode>>> operator_codes = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<SubGraph>>> subgraphs = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::OperatorCode>>>
+    operator_codes = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SubGraph>>> subgraphs =
+    0,
    flatbuffers::Offset<flatbuffers::String> description = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Buffer>>> buffers = 0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Buffer>>> buffers = 0,
    flatbuffers::Offset<flatbuffers::Vector<int32_t>> metadata_buffer = 0,
-  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Metadata>>> metadata = 0)
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::Metadata>>> metadata =
+    0,
+  flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<onert_tflite::SignatureDef>>>
+    signature_defs = 0)
  {
    ModelBuilder builder_(_fbb);
+  builder_.add_signature_defs(signature_defs);
    builder_.add_metadata(metadata);
    builder_.add_metadata_buffer(metadata_buffer);
    builder_.add_buffers(buffers);
@@ -8860,23 +10655,34 @@ inline flatbuffers::Offset<Model> CreateModel(
    return builder_.Finish();
  }
  
-inline flatbuffers::Offset<Model>
-CreateModelDirect(flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
-                  const std::vector<flatbuffers::Offset<OperatorCode>> *operator_codes = nullptr,
-                  const std::vector<flatbuffers::Offset<SubGraph>> *subgraphs = nullptr,
-                  const char *description = nullptr,
-                  const std::vector<flatbuffers::Offset<Buffer>> *buffers = nullptr,
-                  const std::vector<int32_t> *metadata_buffer = nullptr,
-                  const std::vector<flatbuffers::Offset<Metadata>> *metadata = nullptr)
-{
-  return onert_tflite::CreateModel(
-    _fbb, version,
-    operator_codes ? _fbb.CreateVector<flatbuffers::Offset<OperatorCode>>(*operator_codes) : 0,
-    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<SubGraph>>(*subgraphs) : 0,
-    description ? _fbb.CreateString(description) : 0,
-    buffers ? _fbb.CreateVector<flatbuffers::Offset<Buffer>>(*buffers) : 0,
-    metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0,
-    metadata ? _fbb.CreateVector<flatbuffers::Offset<Metadata>>(*metadata) : 0);
+inline flatbuffers::Offset<Model> CreateModelDirect(
+  flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0,
+  const std::vector<flatbuffers::Offset<onert_tflite::OperatorCode>> *operator_codes = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::SubGraph>> *subgraphs = nullptr,
+  const char *description = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Buffer>> *buffers = nullptr,
+  const std::vector<int32_t> *metadata_buffer = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::Metadata>> *metadata = nullptr,
+  const std::vector<flatbuffers::Offset<onert_tflite::SignatureDef>> *signature_defs = nullptr)
+{
+  auto operator_codes__ =
+    operator_codes
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::OperatorCode>>(*operator_codes)
+      : 0;
+  auto subgraphs__ =
+    subgraphs ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SubGraph>>(*subgraphs) : 0;
+  auto description__ = description ? _fbb.CreateString(description) : 0;
+  auto buffers__ =
+    buffers ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Buffer>>(*buffers) : 0;
+  auto metadata_buffer__ = metadata_buffer ? _fbb.CreateVector<int32_t>(*metadata_buffer) : 0;
+  auto metadata__ =
+    metadata ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::Metadata>>(*metadata) : 0;
+  auto signature_defs__ =
+    signature_defs
+      ? _fbb.CreateVector<flatbuffers::Offset<onert_tflite::SignatureDef>>(*signature_defs)
+      : 0;
+  return onert_tflite::CreateModel(_fbb, version, operator_codes__, subgraphs__, description__,
+                                   buffers__, metadata_buffer__, metadata__, signature_defs__);
  }
  
  inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj,
@@ -8890,11 +10696,11 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi
      }
      case QuantizationDetails_CustomQuantization:
      {
-      auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CustomQuantization *>(obj);
        return verifier.VerifyTable(ptr);
      }
      default:
-      return false;
+      return true;
    }
  }
  
@@ -8929,21 +10735,21 @@ inline bool VerifySparseIndexVector(flatbuffers::Verifier &verifier, const void
      }
      case SparseIndexVector_Int32Vector:
      {
-      auto ptr = reinterpret_cast<const Int32Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Int32Vector *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case SparseIndexVector_Uint16Vector:
      {
-      auto ptr = reinterpret_cast<const Uint16Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Uint16Vector *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case SparseIndexVector_Uint8Vector:
      {
-      auto ptr = reinterpret_cast<const Uint8Vector *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Uint8Vector *>(obj);
        return verifier.VerifyTable(ptr);
      }
      default:
-      return false;
+      return true;
    }
  }
  
@@ -8977,511 +10783,576 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
      }
      case BuiltinOptions_Conv2DOptions:
      {
-      auto ptr = reinterpret_cast<const Conv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Conv2DOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_DepthwiseConv2DOptions:
      {
-      auto ptr = reinterpret_cast<const DepthwiseConv2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DepthwiseConv2DOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ConcatEmbeddingsOptions:
      {
-      auto ptr = reinterpret_cast<const ConcatEmbeddingsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ConcatEmbeddingsOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LSHProjectionOptions:
      {
-      auto ptr = reinterpret_cast<const LSHProjectionOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LSHProjectionOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_Pool2DOptions:
      {
-      auto ptr = reinterpret_cast<const Pool2DOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::Pool2DOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SVDFOptions:
      {
-      auto ptr = reinterpret_cast<const SVDFOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SVDFOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_RNNOptions:
      {
-      auto ptr = reinterpret_cast<const RNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RNNOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_FullyConnectedOptions:
      {
-      auto ptr = reinterpret_cast<const FullyConnectedOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FullyConnectedOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SoftmaxOptions:
      {
-      auto ptr = reinterpret_cast<const SoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SoftmaxOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ConcatenationOptions:
      {
-      auto ptr = reinterpret_cast<const ConcatenationOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ConcatenationOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_AddOptions:
      {
-      auto ptr = reinterpret_cast<const AddOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AddOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_L2NormOptions:
      {
-      auto ptr = reinterpret_cast<const L2NormOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::L2NormOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LocalResponseNormalizationOptions:
      {
-      auto ptr = reinterpret_cast<const LocalResponseNormalizationOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LocalResponseNormalizationOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LSTMOptions:
      {
-      auto ptr = reinterpret_cast<const LSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LSTMOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ResizeBilinearOptions:
      {
-      auto ptr = reinterpret_cast<const ResizeBilinearOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ResizeBilinearOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_CallOptions:
      {
-      auto ptr = reinterpret_cast<const CallOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CallOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ReshapeOptions:
      {
-      auto ptr = reinterpret_cast<const ReshapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReshapeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SkipGramOptions:
      {
-      auto ptr = reinterpret_cast<const SkipGramOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SkipGramOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SpaceToDepthOptions:
      {
-      auto ptr = reinterpret_cast<const SpaceToDepthOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SpaceToDepthOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_EmbeddingLookupSparseOptions:
      {
-      auto ptr = reinterpret_cast<const EmbeddingLookupSparseOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::EmbeddingLookupSparseOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_MulOptions:
      {
-      auto ptr = reinterpret_cast<const MulOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MulOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_PadOptions:
      {
-      auto ptr = reinterpret_cast<const PadOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PadOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_GatherOptions:
      {
-      auto ptr = reinterpret_cast<const GatherOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GatherOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_BatchToSpaceNDOptions:
      {
-      auto ptr = reinterpret_cast<const BatchToSpaceNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BatchToSpaceNDOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SpaceToBatchNDOptions:
      {
-      auto ptr = reinterpret_cast<const SpaceToBatchNDOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SpaceToBatchNDOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_TransposeOptions:
      {
-      auto ptr = reinterpret_cast<const TransposeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TransposeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ReducerOptions:
      {
-      auto ptr = reinterpret_cast<const ReducerOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReducerOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SubOptions:
      {
-      auto ptr = reinterpret_cast<const SubOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SubOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_DivOptions:
      {
-      auto ptr = reinterpret_cast<const DivOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DivOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SqueezeOptions:
      {
-      auto ptr = reinterpret_cast<const SqueezeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SqueezeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SequenceRNNOptions:
      {
-      auto ptr = reinterpret_cast<const SequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SequenceRNNOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_StridedSliceOptions:
      {
-      auto ptr = reinterpret_cast<const StridedSliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::StridedSliceOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ExpOptions:
      {
-      auto ptr = reinterpret_cast<const ExpOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ExpOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_TopKV2Options:
      {
-      auto ptr = reinterpret_cast<const TopKV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TopKV2Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SplitOptions:
      {
-      auto ptr = reinterpret_cast<const SplitOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SplitOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LogSoftmaxOptions:
      {
-      auto ptr = reinterpret_cast<const LogSoftmaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogSoftmaxOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_CastOptions:
      {
-      auto ptr = reinterpret_cast<const CastOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CastOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_DequantizeOptions:
      {
-      auto ptr = reinterpret_cast<const DequantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DequantizeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_MaximumMinimumOptions:
      {
-      auto ptr = reinterpret_cast<const MaximumMinimumOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MaximumMinimumOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ArgMaxOptions:
      {
-      auto ptr = reinterpret_cast<const ArgMaxOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ArgMaxOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LessOptions:
      {
-      auto ptr = reinterpret_cast<const LessOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LessOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_NegOptions:
      {
-      auto ptr = reinterpret_cast<const NegOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NegOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_PadV2Options:
      {
-      auto ptr = reinterpret_cast<const PadV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PadV2Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_GreaterOptions:
      {
-      auto ptr = reinterpret_cast<const GreaterOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GreaterOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_GreaterEqualOptions:
      {
-      auto ptr = reinterpret_cast<const GreaterEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GreaterEqualOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LessEqualOptions:
      {
-      auto ptr = reinterpret_cast<const LessEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LessEqualOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SelectOptions:
      {
-      auto ptr = reinterpret_cast<const SelectOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SelectOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SliceOptions:
      {
-      auto ptr = reinterpret_cast<const SliceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SliceOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_TransposeConvOptions:
      {
-      auto ptr = reinterpret_cast<const TransposeConvOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TransposeConvOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SparseToDenseOptions:
      {
-      auto ptr = reinterpret_cast<const SparseToDenseOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SparseToDenseOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_TileOptions:
      {
-      auto ptr = reinterpret_cast<const TileOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::TileOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ExpandDimsOptions:
      {
-      auto ptr = reinterpret_cast<const ExpandDimsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ExpandDimsOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_EqualOptions:
      {
-      auto ptr = reinterpret_cast<const EqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::EqualOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_NotEqualOptions:
      {
-      auto ptr = reinterpret_cast<const NotEqualOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NotEqualOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ShapeOptions:
      {
-      auto ptr = reinterpret_cast<const ShapeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ShapeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_PowOptions:
      {
-      auto ptr = reinterpret_cast<const PowOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PowOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ArgMinOptions:
      {
-      auto ptr = reinterpret_cast<const ArgMinOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ArgMinOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_FakeQuantOptions:
      {
-      auto ptr = reinterpret_cast<const FakeQuantOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FakeQuantOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_PackOptions:
      {
-      auto ptr = reinterpret_cast<const PackOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::PackOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LogicalOrOptions:
      {
-      auto ptr = reinterpret_cast<const LogicalOrOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalOrOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_OneHotOptions:
      {
-      auto ptr = reinterpret_cast<const OneHotOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::OneHotOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LogicalAndOptions:
      {
-      auto ptr = reinterpret_cast<const LogicalAndOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalAndOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LogicalNotOptions:
      {
-      auto ptr = reinterpret_cast<const LogicalNotOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LogicalNotOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_UnpackOptions:
      {
-      auto ptr = reinterpret_cast<const UnpackOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UnpackOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_FloorDivOptions:
      {
-      auto ptr = reinterpret_cast<const FloorDivOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FloorDivOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SquareOptions:
      {
-      auto ptr = reinterpret_cast<const SquareOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SquareOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ZerosLikeOptions:
      {
-      auto ptr = reinterpret_cast<const ZerosLikeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ZerosLikeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_FillOptions:
      {
-      auto ptr = reinterpret_cast<const FillOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FillOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_BidirectionalSequenceLSTMOptions:
      {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceLSTMOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_BidirectionalSequenceRNNOptions:
      {
-      auto ptr = reinterpret_cast<const BidirectionalSequenceRNNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BidirectionalSequenceRNNOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_UnidirectionalSequenceLSTMOptions:
      {
-      auto ptr = reinterpret_cast<const UnidirectionalSequenceLSTMOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UnidirectionalSequenceLSTMOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_FloorModOptions:
      {
-      auto ptr = reinterpret_cast<const FloorModOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::FloorModOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_RangeOptions:
      {
-      auto ptr = reinterpret_cast<const RangeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RangeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ResizeNearestNeighborOptions:
      {
-      auto ptr = reinterpret_cast<const ResizeNearestNeighborOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ResizeNearestNeighborOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_LeakyReluOptions:
      {
-      auto ptr = reinterpret_cast<const LeakyReluOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::LeakyReluOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SquaredDifferenceOptions:
      {
-      auto ptr = reinterpret_cast<const SquaredDifferenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SquaredDifferenceOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_MirrorPadOptions:
      {
-      auto ptr = reinterpret_cast<const MirrorPadOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MirrorPadOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_AbsOptions:
      {
-      auto ptr = reinterpret_cast<const AbsOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AbsOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SplitVOptions:
      {
-      auto ptr = reinterpret_cast<const SplitVOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SplitVOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_UniqueOptions:
      {
-      auto ptr = reinterpret_cast<const UniqueOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::UniqueOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ReverseV2Options:
      {
-      auto ptr = reinterpret_cast<const ReverseV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReverseV2Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_AddNOptions:
      {
-      auto ptr = reinterpret_cast<const AddNOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::AddNOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_GatherNdOptions:
      {
-      auto ptr = reinterpret_cast<const GatherNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::GatherNdOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_CosOptions:
      {
-      auto ptr = reinterpret_cast<const CosOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::CosOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_WhereOptions:
      {
-      auto ptr = reinterpret_cast<const WhereOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::WhereOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_RankOptions:
      {
-      auto ptr = reinterpret_cast<const RankOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::RankOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ReverseSequenceOptions:
      {
-      auto ptr = reinterpret_cast<const ReverseSequenceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ReverseSequenceOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_MatrixDiagOptions:
      {
-      auto ptr = reinterpret_cast<const MatrixDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MatrixDiagOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_QuantizeOptions:
      {
-      auto ptr = reinterpret_cast<const QuantizeOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::QuantizeOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_MatrixSetDiagOptions:
      {
-      auto ptr = reinterpret_cast<const MatrixSetDiagOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::MatrixSetDiagOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_HardSwishOptions:
      {
-      auto ptr = reinterpret_cast<const HardSwishOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::HardSwishOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_IfOptions:
      {
-      auto ptr = reinterpret_cast<const IfOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::IfOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_WhileOptions:
      {
-      auto ptr = reinterpret_cast<const WhileOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::WhileOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_DepthToSpaceOptions:
      {
-      auto ptr = reinterpret_cast<const DepthToSpaceOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DepthToSpaceOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_NonMaxSuppressionV4Options:
      {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV4Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV4Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_NonMaxSuppressionV5Options:
      {
-      auto ptr = reinterpret_cast<const NonMaxSuppressionV5Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::NonMaxSuppressionV5Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_ScatterNdOptions:
      {
-      auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::ScatterNdOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SelectV2Options:
      {
-      auto ptr = reinterpret_cast<const SelectV2Options *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SelectV2Options *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_DensifyOptions:
      {
-      auto ptr = reinterpret_cast<const DensifyOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::DensifyOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_SegmentSumOptions:
      {
-      auto ptr = reinterpret_cast<const SegmentSumOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::SegmentSumOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      case BuiltinOptions_BatchMatMulOptions:
      {
-      auto ptr = reinterpret_cast<const BatchMatMulOptions *>(obj);
+      auto ptr = reinterpret_cast<const onert_tflite::BatchMatMulOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CumsumOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::CumsumOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_CallOnceOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::CallOnceOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_BroadcastToOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::BroadcastToOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Rfft2dOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::Rfft2dOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_Conv3DOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::Conv3DOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableFindOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableFindOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableImportOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableImportOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_HashtableSizeOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::HashtableSizeOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_VarHandleOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::VarHandleOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_ReadVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::ReadVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_AssignVariableOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::AssignVariableOptions *>(obj);
+      return verifier.VerifyTable(ptr);
+    }
+    case BuiltinOptions_RandomOptions:
+    {
+      auto ptr = reinterpret_cast<const onert_tflite::RandomOptions *>(obj);
        return verifier.VerifyTable(ptr);
      }
      default:
-      return false;
+      return true;
    }
  }
  
diff --git a/runtime/onert/frontend/trix/CMakeLists.txt b/runtime/onert/frontend/trix/CMakeLists.txt

new file mode 100644 (file)

index 0000000..7a0df4e
--- /dev/null
+++ b/runtime/onert/frontend/trix/CMakeLists.txt
@@ -0,0 +1,21 @@
+if (NOT BUILD_TRIX_LOADER)
+  return()
+endif ()
+
+nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+if(TRIXEngine_FOUND)
+  list(APPEND SOURCES src/trix_loader.cc)
+else()
+  list(APPEND SOURCES src/trix_loader_dummy.cc)
+endif(TRIXEngine_FOUND)
+
+add_library(trix_loader STATIC ${SOURCES})
+set_target_properties(trix_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(trix_loader PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(trix_loader PRIVATE onert_core)
+target_link_libraries(trix_loader PRIVATE nnfw_common nnfw_coverage)
+
+if(TRIXEngine_FOUND)
+  target_include_directories(trix_loader PUBLIC ${TRIXEngine_INCLUDE_DIR})
+  target_link_libraries(trix_loader PRIVATE trix_engine)
+endif(TRIXEngine_FOUND)
diff --git a/runtime/onert/frontend/trix/include/trix_loader.h b/runtime/onert/frontend/trix/include/trix_loader.h

new file mode 100644 (file)

index 0000000..297d5ec
--- /dev/null
+++ b/runtime/onert/frontend/trix/include/trix_loader.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRIX_TRIX_LOADER_H__
+#define __TRIX_TRIX_LOADER_H__
+
+#include "ir/Graph.h"
+#include <memory>
+
+namespace onert
+{
+namespace trix_loader
+{
+/**
+ * @throw runtime_error when tvn path is wrong or tvn is invalid
+ */
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename);
+} // namespace trix_loader
+} // namespace onert
+
+#endif // __TRIX_TRIX_LOADER_H__
diff --git a/runtime/onert/frontend/trix/src/trix_loader.cc b/runtime/onert/frontend/trix/src/trix_loader.cc

new file mode 100644 (file)

index 0000000..e2995bb
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader.cc
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+#include "ir/Graph.h"
+#include "ir/operation/Bulk.h"
+
+#include <libnpuhost.h>
+#include <npubinfmt.h>
+#include <typedef.h>
+
+namespace onert
+{
+namespace trix_loader
+{
+
+/**
+ * @brief A tvn metadata reader
+ */
+class TrixMetaReader
+{
+public:
+  TrixMetaReader() = default;
+  ~TrixMetaReader() { free(_meta); }
+
+  void init(const char *path);
+  data_layout input_seg_layout(uint32_t n) const { return _meta->input_seg_layout[n]; }
+  data_layout output_seg_layout(uint32_t n) const { return _meta->output_seg_layout[n]; }
+  data_type input_seg_quant_type(uint32_t n) const { return _meta->input_seg_quant_type[n]; }
+  data_type output_seg_quant_type(uint32_t n) const { return _meta->output_seg_quant_type[n]; }
+  float input_seg_quant_scale(uint32_t n) const { return _meta->input_seg_quant_s[n]; }
+  float output_seg_quant_scale(uint32_t n) const { return _meta->output_seg_quant_s[n]; }
+  int32_t input_seg_quant_zp(uint32_t n) { return _meta->input_seg_quant_z[n]; }
+  int32_t output_seg_quant_zp(uint32_t n) { return _meta->output_seg_quant_z[n]; }
+  uint32_t input_seg_num() const { return _meta->input_seg_num; }
+  uint32_t output_seg_num() const { return _meta->output_seg_num; }
+  uint32_t input_seg_dims(uint32_t n, uint32_t axis) const
+  {
+    return _meta->input_seg_dims[n][axis];
+  }
+  uint32_t output_seg_dims(uint32_t n, uint32_t axis) const
+  {
+    return _meta->output_seg_dims[n][axis];
+  }
+
+private:
+  npubin_meta *_meta = nullptr;
+};
+
+void TrixMetaReader::init(const char *path)
+{
+  assert(path);
+  _meta = getNPUmodel_metadata(path, false);
+  if (_meta == nullptr)
+  {
+    throw std::runtime_error("Failed to get TRIV2 model metadata");
+  }
+  if (NPUBIN_VERSION(_meta->magiccode) != 3)
+  {
+    throw std::runtime_error("TRIV2 model metadata version mismatched.");
+  }
+}
+
+class TrixLoader
+{
+public:
+  /**
+   * @brief Construct a new Loader object
+   *
+   * @param graph reference on subgraphs
+   */
+  explicit TrixLoader(std::unique_ptr<ir::Subgraphs> &subgs) : _subgraphs(subgs) {}
+
+  /**
+   * @brief Load a model from file
+   * @param file_path
+   */
+  void loadFromFile(const std::string &file_path);
+
+private:
+  /*
+   * @brief Load actually
+   * @throw runtime_error when tvn path is wrong or tvn is invalid
+   */
+  void loadModel();
+  void loadSubgraphs();
+  std::unique_ptr<ir::Graph> loadSubgraph();
+  void loadOperands(ir::Graph &subg);
+  ir::OperandIndex loadOperandFromInput(uint32_t i, ir::Graph &subg);
+  ir::OperandIndex loadOperandFromOutput(uint32_t i, ir::Graph &subg);
+  void loadBulk(ir::Graph &subg);
+  void loadOperationIO(ir::OperandIndexSequence &inputs, ir::OperandIndexSequence &outputs);
+  ir::OperandIndex inputIdxToOperandIdx(uint32_t i) const;
+  ir::OperandIndex outputIdxToOperandIdx(uint32_t i) const;
+  ir::DataType toDataType(const data_type type) const;
+
+private:
+protected:
+  /** path to model (e.g. tvn) */
+  std::string _model_path;
+  /** Reference on loadable subgraphs */
+  std::unique_ptr<ir::Subgraphs> &_subgraphs;
+  TrixMetaReader _meta;
+};
+
+ir::DataType TrixLoader::toDataType(const data_type type) const
+{
+  switch (type)
+  {
+    case DATA_TYPE_QASYMM8:
+      return ir::DataType::QUANT_UINT8_ASYMM;
+    case DATA_TYPE_QSYMM16:
+      return ir::DataType::QUANT_INT16_SYMM;
+    default:
+      throw std::runtime_error("Unsupported data type from trix model");
+  }
+}
+
+ir::OperandIndex TrixLoader::inputIdxToOperandIdx(uint32_t i) const { return ir::OperandIndex(i); }
+ir::OperandIndex TrixLoader::outputIdxToOperandIdx(uint32_t i) const
+{
+  return ir::OperandIndex(_meta.input_seg_num() + i);
+}
+
+void TrixLoader::loadOperationIO(ir::OperandIndexSequence &inputs,
+                                 ir::OperandIndexSequence &outputs)
+{
+  for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+  {
+    inputs.append(inputIdxToOperandIdx(i));
+  }
+
+  for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+  {
+    outputs.append(outputIdxToOperandIdx(i));
+  }
+}
+
+void TrixLoader::loadBulk(ir::Graph &subg)
+{
+  ir::operation::Bulk::Param param;
+  param.binary_path = _model_path;
+
+  ir::OperandIndexSequence inputs;
+  ir::OperandIndexSequence outputs;
+
+  loadOperationIO(inputs, outputs);
+
+  std::unique_ptr<ir::operation::Bulk> bulk(new ir::operation::Bulk(inputs, outputs, param));
+  subg.addOperation(std::move(bulk));
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromInput(uint32_t idx, ir::Graph &subg)
+{
+  // Shape
+  ir::Shape shape;
+  for (uint32_t d = 0; d < MAX_RANK; ++d)
+    shape.append(_meta.input_seg_dims(idx, d));
+
+  // TypeInfo
+  ir::TypeInfo type_info(toDataType(_meta.input_seg_quant_type(idx)),
+                         _meta.input_seg_quant_scale(idx), _meta.input_seg_quant_zp(idx));
+
+  // Create operand
+  const auto operand_index = subg.addOperand(shape, type_info);
+  return operand_index;
+}
+
+ir::OperandIndex TrixLoader::loadOperandFromOutput(uint32_t idx, ir::Graph &subg)
+{
+  // Shape
+  ir::Shape shape;
+  for (uint32_t d = 0; d < MAX_RANK; ++d)
+    shape.append(_meta.output_seg_dims(idx, d));
+
+  // TypeInfo
+  ir::TypeInfo type_info(toDataType(_meta.output_seg_quant_type(idx)),
+                         _meta.output_seg_quant_scale(idx), _meta.output_seg_quant_zp(idx));
+
+  // Create operand
+  const auto operand_index = subg.addOperand(shape, type_info);
+  return operand_index;
+}
+
+void TrixLoader::loadOperands(ir::Graph &subg)
+{
+  auto in_num = _meta.input_seg_num();
+  for (uint32_t i = 0; i < in_num; ++i)
+  {
+    loadOperandFromInput(i, subg);
+  }
+  auto out_num = _meta.output_seg_num();
+  for (uint32_t i = 0; i < out_num; ++i)
+  {
+    loadOperandFromOutput(i, subg);
+  }
+}
+
+std::unique_ptr<ir::Graph> TrixLoader::loadSubgraph()
+{
+  auto subg = std::make_unique<ir::Graph>();
+  _meta.init(_model_path.c_str());
+
+  // Load tensors
+  loadOperands(*subg);
+
+  // Set inputs
+  for (uint32_t i = 0; i < _meta.input_seg_num(); ++i)
+  {
+    subg->addInput(inputIdxToOperandIdx(i), "tvn_input" + std::to_string(i));
+  }
+  // Set outputs
+  for (uint32_t i = 0; i < _meta.output_seg_num(); ++i)
+  {
+    subg->addOutput(outputIdxToOperandIdx(i), "tvn_out" + std::to_string(i));
+  }
+  // Create operations
+  loadBulk(*subg);
+
+  // TODO: NHWC only supported at this moment.
+  subg->setLayout(ir::Layout::NHWC);
+  subg->verify();
+  return subg;
+}
+
+void TrixLoader::loadSubgraphs()
+{
+  // one subgraph only
+  auto subg = loadSubgraph();
+  _subgraphs->push(ir::SubgraphIndex(0), std::move(subg));
+}
+
+void TrixLoader::loadModel() { loadSubgraphs(); }
+
+void TrixLoader::loadFromFile(const std::string &file_path)
+{
+  // model path will be used to set Bulk param
+  _model_path = file_path;
+  // metadata is initialized from model path since it is loadFromFile
+  _meta.init(_model_path.c_str());
+  loadModel();
+}
+
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &filename)
+{
+  auto subgraphs = std::make_unique<ir::Subgraphs>();
+  TrixLoader loader(subgraphs);
+  loader.loadFromFile(filename);
+  return subgraphs;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/runtime/onert/frontend/trix/src/trix_loader_dummy.cc b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc

new file mode 100644 (file)

index 0000000..9fc8e1f
--- /dev/null
+++ b/runtime/onert/frontend/trix/src/trix_loader_dummy.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "trix_loader.h"
+
+// Dummy implementation to avoid build error for target, which doesn't have trix_engine
+
+namespace onert
+{
+namespace trix_loader
+{
+std::unique_ptr<ir::Subgraphs> loadModel(const std::string &)
+{
+  auto subgraphs = std::make_unique<ir::Subgraphs>();
+  return subgraphs;
+}
+} // namespace trix_loader
+} // namespace onert
diff --git a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py

index 279c000ba45f24d67995ce155b26c13147dea6b3..6663b79c0fc6b9bd3e9cbc6d0facce103306bd99 100644 (file)
--- a/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
+++ b/tests/nnapi/specs/V1_2/equal_broadcast_float_nnfw.mod.py
@@ -3,8 +3,8 @@ model = Model()
  i1 = Input("op1", "TENSOR_FLOAT32", "{2, 2}")
  i2 = Input("op2", "TENSOR_FLOAT32", "{1, 2}")
  
-i3 = Output("op3", "TENSOR_QUANT8_ASYMM", "{2, 2}, 1.f, 0")
-model = model.Operation("EQUAL_EX", i1, i2).To(i3)
+i3 = Output("op3", "TENSOR_BOOL8", "{2, 2}")
+model = model.Operation("EQUAL", i1, i2).To(i3)
  
  # Example 1. Input in operand 0,
  input0 = {i1: # input 0
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc

index 0ffc8fb4436616eaed960010b0867dfc10fdd296..e4e4ba1af4671688f07a803ce476011a5a83a5b9 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -525,6 +525,13 @@ uint32_t CircleGen::addOperatorSquare(const OperatorParams &params)
                                  circle::BuiltinOptions_SquareOptions, options);
  }
  
+uint32_t CircleGen::addOperatorBatchToSpaceND(const OperatorParams &params)
+{
+  auto options = circle::CreateBatchToSpaceNDOptions(_fbb).Union();
+  return addOperatorWithOptions(params, circle::BuiltinOperator_BATCH_TO_SPACE_ND,
+                                circle::BuiltinOptions_BatchToSpaceNDOptions, options);
+}
+
  // NOTE Please add addOperator functions ABOVE this lie
  //
  // %  How to add a new addOperatorXXX fuction
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h

index f6f799668a2535b24ef239ecd5517333b8b2c468..062a8d35a2f352f702896526ad47a2bea766e666 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -146,6 +146,7 @@ public:
    uint32_t addOperatorAveragePool2D(const OperatorParams &params, circle::Padding padding,
                                      int stride_w, int stride_h, int filter_w, int filter_h,
                                      circle::ActivationFunctionType actfn);
+  uint32_t addOperatorBatchToSpaceND(const OperatorParams &params);
    uint32_t addOperatorCast(const OperatorParams &params, circle::TensorType input_type,
                             circle::TensorType output_type);
    uint32_t addOperatorConcatenation(const OperatorParams &params, int axis,
diff --git a/tests/nnfw_api/src/GenModelTests.cc b/tests/nnfw_api/src/GenModelTests.cc

index a17a7e8bad30822692665d6fcd54463b8bd9c7a5..53a3571dbe825043ed78aef186ce6758022c3162 100644 (file)
--- a/tests/nnfw_api/src/GenModelTests.cc
+++ b/tests/nnfw_api/src/GenModelTests.cc
@@ -308,7 +308,7 @@ TEST_F(GenModelTest, Reshape_without_shape_param)
    CircleGen::Shape new_shape_val{2, 2};
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
    _context->addTestCase(uniformTCD<int32_t>({{1, 2, 3, 4}, new_shape_val}, {{1, 2, 3, 4}}));
-  _context->output_sizes(0, sizeof(i32) * 4);
+  _context->output_sizes(0, sizeof(int32_t) * 4);
    _context->setBackends({"cpu" /* "acl_cl", "acl_neon" does not support dynamic tensor */});
  
    SUCCEED();
diff --git a/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc

new file mode 100644 (file)

index 0000000..3f45543
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/BatchToSpaceND.cc
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_1x1)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+  cgen.setInputsAndOutputs({in, block}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(TestCaseData{}
+                          .addInput<float>({1, 2, 3, 4})
+                          .addInput<int32_t>({2, 2})
+                          .addOutput<float>({1, 2, 3, 4}));
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_notCrop_2x2)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{4, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block}, {out}});
+  cgen.setInputsAndOutputs({in, block}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>({1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16})
+      .addInput<int32_t>({2, 2})
+      .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, OneOp_BatchToSpaceND_Crop)
+{
+  CircleGen cgen;
+  int in = cgen.addTensor({{8, 1, 3, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{2, 2, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int block = cgen.addTensor({{2}, circle::TensorType::TensorType_INT32});
+  int crop = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32});
+  cgen.addOperatorBatchToSpaceND({{in, block, crop}, {out}});
+  cgen.setInputsAndOutputs({in, block, crop}, {out});
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(
+    TestCaseData{}
+      .addInput<float>(
+        {0, 1, 3, 0, 9, 11, 0, 2, 4, 0, 10, 12, 0, 5, 7, 0, 13, 15, 0, 6, 8, 0, 14, 16})
+      .addInput<int32_t>({2, 2})
+      .addInput<int32_t>({0, 0, 2, 0})
+      .addOutput<float>({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}));
+  _context->setBackends({"cpu"});
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Equal.cc b/tests/nnfw_api/src/one_op_tests/Equal.cc

index 9f79575f2393641881b4406820eea9105d3d3c19..da890978e1e39adba62d62a18298e2d671fbde7f 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Equal.cc
+++ b/tests/nnfw_api/src/one_op_tests/Equal.cc
@@ -16,21 +16,111 @@
  
  #include "GenModelTest.h"
  
-TEST_F(GenModelTest, OneOp_Equal)
+struct EqualVariationParam
  {
+  TestCaseData tcd;
+  circle::TensorType input_type = circle::TensorType::TensorType_FLOAT32;
+  const std::vector<std::string> backends = {"acl_cl", "acl_neon", "cpu"};
+};
+
+class EqualVariation : public GenModelTest,
+                       public ::testing::WithParamInterface<EqualVariationParam>
+{
+};
+
+// Input shape:
+//   Base: {1, 2, 2, 1}
+//   Brodcast: {1} on of two input
+// Output shape: {1, 2, 2, 1}
+// Input type: Non-quantization type
+// Output type: BOOL
+// Test with different input type and value
+INSTANTIATE_TEST_SUITE_P(GenModelTest, EqualVariation,
+                         ::testing::Values(
+                           // Float type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                 .addInput<float>({0.1, 0.2, 0.3, 0.4})
+                                                 .addOutput<bool>({true, false, false, false})},
+                           // Float type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<float>({0.1, 0.3, 0.5, 0.7})
+                                                 .addInput<float>({0.3})
+                                                 .addOutput<bool>({false, true, false, false})},
+                           // Int32 type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int32_t>({1, 3, 5, 7})
+                                                 .addInput<int32_t>({1, 2, 3, 4})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT32},
+                           // Int32 type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int32_t>({1, 3, 5, 7})
+                                                 .addInput<int32_t>({5})
+                                                 .addOutput<bool>({false, false, true, false}),
+                                               circle::TensorType::TensorType_INT32},
+                           // Int64 type
+                           // NYI: acl backend
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int64_t>({1, 3, 5, 7})
+                                                 .addInput<int64_t>({1, 2, 3, 4})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT64,
+                                               {"cpu"}},
+                           // Int64 type - broadcast
+                           // NYI: acl backend
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<int64_t>({1, 3, 5, 7})
+                                                 .addInput<int64_t>({1})
+                                                 .addOutput<bool>({true, false, false, false}),
+                                               circle::TensorType::TensorType_INT64,
+                                               {"cpu"}},
+                           // Bool type
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<bool>({true, true, false, false})
+                                                 .addInput<bool>({true, false, true, false})
+                                                 .addOutput<bool>({true, false, false, true}),
+                                               circle::TensorType::TensorType_BOOL},
+                           // Bool type - broadcast
+                           EqualVariationParam{TestCaseData{}
+                                                 .addInput<bool>({true, true, false, false})
+                                                 .addInput<bool>({true})
+                                                 .addOutput<bool>({true, true, false, false}),
+                                               circle::TensorType::TensorType_BOOL}
+
+                           ));
+
+TEST_P(EqualVariation, Test)
+{
+  auto &param = GetParam();
+
+  auto lhs_data = param.tcd.inputs.at(0);
+  auto rhs_data = param.tcd.inputs.at(1);
+
+  bool broadcast_lhs = false;
+  bool broadcast_rhs = false;
+  if (lhs_data.size() != rhs_data.size())
+  {
+    if (lhs_data.size() < rhs_data.size())
+      broadcast_lhs = true;
+    else
+      broadcast_rhs = true;
+  }
+
    CircleGen cgen;
-  int lhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int rhs = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_BOOL});
+  const auto output_type = circle::TensorType::TensorType_BOOL;
+
+  int lhs = broadcast_lhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int rhs = broadcast_rhs ? cgen.addTensor({{1}, param.input_type})
+                          : cgen.addTensor({{1, 2, 2, 1}, param.input_type});
+  int out = cgen.addTensor({{1, 2, 2, 1}, output_type});
    cgen.addOperatorEqual({{lhs, rhs}, {out}});
    cgen.setInputsAndOutputs({lhs, rhs}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(TestCaseData{}
-                          .addInput<float>({0.1, 0.3, 0.5, 0.7})
-                          .addInput<float>({0.1, 0.2, 0.3, 0.4})
-                          .addOutput<bool>({true, false, false, false}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backends);
  
    SUCCEED();
  }
diff --git a/tests/tools/nnpackage_run/CMakeLists.txt b/tests/tools/nnpackage_run/CMakeLists.txt

index d13417847ecfa4ee139dcc922515b448ba3f20d2..49070d30d9b6e9360f3b82ef18ab63e247a435dd 100644 (file)
--- a/tests/tools/nnpackage_run/CMakeLists.txt
+++ b/tests/tools/nnpackage_run/CMakeLists.txt
@@ -10,6 +10,7 @@ list(APPEND NNPACKAGE_RUN_SRCS "src/nnpackage_run.cc")
  list(APPEND NNPACKAGE_RUN_SRCS "src/args.cc")
  list(APPEND NNPACKAGE_RUN_SRCS "src/nnfw_util.cc")
  list(APPEND NNPACKAGE_RUN_SRCS "src/randomgen.cc")
+list(APPEND NNPACKAGE_RUN_SRCS "src/rawformatter.cc")
  
  nnfw_find_package(Boost REQUIRED program_options)
  nnfw_find_package(Ruy QUIET)
diff --git a/tests/tools/nnpackage_run/src/args.cc b/tests/tools/nnpackage_run/src/args.cc

index c0f937797da20f8dac71955a94314a7821bd3b70..ba034cee30fe32e86460eb30292ab1d07a5521ec 100644 (file)
--- a/tests/tools/nnpackage_run/src/args.cc
+++ b/tests/tools/nnpackage_run/src/args.cc
@@ -201,13 +201,15 @@ void Args::Initialize(void)
      ("dump,d", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_filename = v; }), "Output filename")
      ("load,l", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_filename = v; }), "Input filename")
  #endif
+    ("dump:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _dump_raw_filename = v; }), "Raw Output filename")
+    ("load:raw", po::value<std::string>()->default_value("")->notifier([&](const auto &v) { _load_raw_filename = v; }), "Raw Input filename")
      ("output_sizes", po::value<std::string>()->notifier(process_output_sizes),
          "The output buffer size in JSON 1D array\n"
          "If not given, the model's output sizes are used\n"
          "e.g. '[0, 40, 2, 80]' to set 0th tensor to 40 and 2nd tensor to 80.\n")
      ("num_runs,r", po::value<int>()->default_value(1)->notifier([&](const auto &v) { _num_runs = v; }), "The number of runs")
      ("warmup_runs,w", po::value<int>()->default_value(0)->notifier([&](const auto &v) { _warmup_runs = v; }), "The number of warmup runs")
-    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(ms) between runs (as default no delay")
+    ("run_delay,t", po::value<int>()->default_value(-1)->notifier([&](const auto &v) { _run_delay = v; }), "Delay time(us) between runs (as default no delay")
      ("gpumem_poll,g", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _gpumem_poll = v; }), "Check gpu memory polling separately")
      ("mem_poll,m", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _mem_poll = v; }), "Check memory polling")
      ("write_report,p", po::value<bool>()->default_value(false)->notifier([&](const auto &v) { _write_report = v; }),
diff --git a/tests/tools/nnpackage_run/src/args.h b/tests/tools/nnpackage_run/src/args.h

index 11fd0002362260c982b46dfe23ef72fcb1748e0e..82015afcc0f4244569de90e9bf51751a397161c5 100644 (file)
--- a/tests/tools/nnpackage_run/src/args.h
+++ b/tests/tools/nnpackage_run/src/args.h
@@ -52,6 +52,8 @@ public:
    const std::string &getLoadFilename(void) const { return _load_filename; }
    WhenToUseH5Shape getWhenToUseH5Shape(void) const { return _when_to_use_h5_shape; }
  #endif
+  const std::string &getDumpRawFilename(void) const { return _dump_raw_filename; }
+  const std::string &getLoadRawFilename(void) const { return _load_raw_filename; }
    const int getNumRuns(void) const { return _num_runs; }
    const int getWarmupRuns(void) const { return _warmup_runs; }
    const int getRunDelay(void) const { return _run_delay; }
@@ -80,6 +82,8 @@ private:
    std::string _load_filename;
    WhenToUseH5Shape _when_to_use_h5_shape = WhenToUseH5Shape::NOT_PROVIDED;
  #endif
+  std::string _dump_raw_filename;
+  std::string _load_raw_filename;
    TensorShapeMap _shape_prepare;
    TensorShapeMap _shape_run;
    int _num_runs;
diff --git a/tests/tools/nnpackage_run/src/formatter.h b/tests/tools/nnpackage_run/src/formatter.h

new file mode 100644 (file)

index 0000000..0dca340
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/formatter.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNPACKAGE_RUN_FORMATTER_H__
+#define __NNPACKAGE_RUN_FORMATTER_H__
+
+#include <string>
+#include <vector>
+
+#include "types.h"
+#include "allocation.h"
+
+struct nnfw_session;
+
+namespace nnpkg_run
+{
+class Formatter
+{
+public:
+  virtual ~Formatter() = default;
+  Formatter(nnfw_session *sess) : session_(sess) {}
+  virtual void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) = 0;
+  virtual void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) = 0;
+  virtual std::vector<TensorShape> readTensorShapes(const std::string &filename)
+  {
+    return std::vector<TensorShape>();
+  };
+
+protected:
+  nnfw_session *session_;
+};
+} // namespace nnpkg_run
+
+#endif // __NNPACKAGE_RUN_FORMATTER_H__
diff --git a/tests/tools/nnpackage_run/src/h5formatter.cc b/tests/tools/nnpackage_run/src/h5formatter.cc

index e207465d4d4285836beeffcee625fa640c4fa8a1..c0e670b1eaaf424fd808d60d510078af16c1a70b 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.cc
+++ b/tests/tools/nnpackage_run/src/h5formatter.cc
@@ -143,6 +143,8 @@ void H5Formatter::loadInputs(const std::string &filename, std::vector<Allocation
            else
              throw std::runtime_error("model input type is int8. But h5 data type is different.");
            break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
          default:
            throw std::runtime_error("nnpkg_run can load f32, i32, qasymm8, bool and uint8.");
        }
@@ -234,6 +236,8 @@ void H5Formatter::dumpOutputs(const std::string &filename, std::vector<Allocatio
            data_set.write(outputs[i].data(), H5::PredType::NATIVE_INT8);
            break;
          }
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          throw std::runtime_error("NYI for NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED type");
          default:
            throw std::runtime_error("nnpkg_run can dump f32, i32, qasymm8, bool and uint8.");
        }
diff --git a/tests/tools/nnpackage_run/src/h5formatter.h b/tests/tools/nnpackage_run/src/h5formatter.h

index 5c831021b7bb99f05dae4ced8bd5983cd399d5d5..ca2d5a576737a7f1dcc379c5dbdc12ee19517eac 100644 (file)
--- a/tests/tools/nnpackage_run/src/h5formatter.h
+++ b/tests/tools/nnpackage_run/src/h5formatter.h
@@ -17,26 +17,24 @@
  #ifndef __NNPACKAGE_RUN_H5FORMATTER_H__
  #define __NNPACKAGE_RUN_H5FORMATTER_H__
  
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
  #include <string>
  #include <vector>
  
-#include "types.h"
-#include "allocation.h"
-
  struct nnfw_session;
  
  namespace nnpkg_run
  {
-class H5Formatter
+class H5Formatter : public Formatter
  {
  public:
-  H5Formatter(nnfw_session *sess) : session_(sess) {}
-  std::vector<TensorShape> readTensorShapes(const std::string &filename);
-  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs);
-  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs);
-
-private:
-  nnfw_session *session_;
+  H5Formatter(nnfw_session *sess) : Formatter(sess) {}
+  std::vector<TensorShape> readTensorShapes(const std::string &filename) override;
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
  };
  } // namespace nnpkg_run
  
diff --git a/tests/tools/nnpackage_run/src/nnfw_util.cc b/tests/tools/nnpackage_run/src/nnfw_util.cc

index a57069bd874f163bf23ef93af878aa55b36cabcb..da98da5e46abe71102bc7dcff14d92f829d5cb60 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnfw_util.cc
+++ b/tests/tools/nnpackage_run/src/nnfw_util.cc
@@ -41,6 +41,7 @@ uint64_t bufsize_for(const nnfw_tensorinfo *ti)
      sizeof(uint8_t), /* NNFW_TYPE_TENSOR_UINT8 = 4 */
      sizeof(int64_t), /* NNFW_TYPE_TENSOR_INT64 = 5 */
      sizeof(int8_t),  /* NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6 */
+    sizeof(int16_t), /* NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7 */
    };
    return elmsize[ti->dtype] * num_elems(ti);
  }
diff --git a/tests/tools/nnpackage_run/src/nnpackage_run.cc b/tests/tools/nnpackage_run/src/nnpackage_run.cc

index 1fcab512acbb5b184fbeea9e735bba2219006b9e..71d8b597751a58af41fd527e2ba8ad01cc896237 100644 (file)
--- a/tests/tools/nnpackage_run/src/nnpackage_run.cc
+++ b/tests/tools/nnpackage_run/src/nnpackage_run.cc
@@ -24,6 +24,7 @@
  #include "nnfw_util.h"
  #include "nnfw_internal.h"
  #include "randomgen.h"
+#include "rawformatter.h"
  #ifdef RUY_PROFILER
  #include "ruy/profiler/profiler.h"
  #endif
@@ -97,7 +98,7 @@ int main(const int argc, char **argv)
          nnfw_tensorinfo ti;
          NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session, i, &ti));
  
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
          {
            std::cerr << "E: not supported input type" << std::endl;
            exit(-1);
@@ -114,7 +115,7 @@ int main(const int argc, char **argv)
          nnfw_tensorinfo ti;
          NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session, i, &ti));
  
-        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED)
+        if (ti.dtype < NNFW_TYPE_TENSOR_FLOAT32 || ti.dtype > NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED)
          {
            std::cerr << "E: not supported output type" << std::endl;
            exit(-1);
@@ -194,10 +195,15 @@ int main(const int argc, char **argv)
  #if defined(ONERT_HAVE_HDF5) && ONERT_HAVE_HDF5 == 1
      if (!args.getLoadFilename().empty())
        H5Formatter(session).loadInputs(args.getLoadFilename(), inputs);
+    else if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
      else
        RandomGenerator(session).generate(inputs);
  #else
-    RandomGenerator(session).generate(inputs);
+    if (!args.getLoadRawFilename().empty())
+      RawFormatter(session).loadInputs(args.getLoadRawFilename(), inputs);
+    else
+      RandomGenerator(session).generate(inputs);
  #endif
  
      // prepare output
@@ -267,6 +273,8 @@ int main(const int argc, char **argv)
      if (!args.getDumpFilename().empty())
        H5Formatter(session).dumpOutputs(args.getDumpFilename(), outputs);
  #endif
+    if (!args.getDumpRawFilename().empty())
+      RawFormatter(session).dumpOutputs(args.getDumpRawFilename(), outputs);
  
      NNPR_ENSURE_STATUS(nnfw_close_session(session));
  
diff --git a/tests/tools/nnpackage_run/src/randomgen.cc b/tests/tools/nnpackage_run/src/randomgen.cc

index a1fcf82dca2bafe49474510786079b7401d94938..4789b6b1a0f251e5b641e7aa6afa306fd4a21103 100644 (file)
--- a/tests/tools/nnpackage_run/src/randomgen.cc
+++ b/tests/tools/nnpackage_run/src/randomgen.cc
@@ -61,6 +61,9 @@ void RandomGenerator::generate(std::vector<Allocation> &inputs)
        case NNFW_TYPE_TENSOR_INT64:
          randomData<int64_t>(randgen, inputs[i].data(), num_elems(&ti));
          break;
+      case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+        randomData<int16_t>(randgen, inputs[i].data(), num_elems(&ti));
+        break;
        default:
          std::cerr << "Not supported input type" << std::endl;
          std::exit(-1);
diff --git a/tests/tools/nnpackage_run/src/rawformatter.cc b/tests/tools/nnpackage_run/src/rawformatter.cc

new file mode 100644 (file)

index 0000000..f90018e
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/rawformatter.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rawformatter.h"
+#include "nnfw.h"
+#include "nnfw_util.h"
+
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+
+namespace nnpkg_run
+{
+void RawFormatter::loadInputs(const std::string &filename, std::vector<Allocation> &inputs)
+{
+  uint32_t num_inputs;
+  NNPR_ENSURE_STATUS(nnfw_input_size(session_, &num_inputs));
+
+  // TODO: Support multiple inputs
+  // Option 1. Get comman-separated input file list like --load:raw in.0,in.1,in.2
+  // Option 2. Get prefix --load:raw out
+  //           Internally access out.0, out.1, out.2, ... out.{N} where N is determined by api.
+  if (num_inputs != 1)
+  {
+    throw std::runtime_error("Only 1 input is supported for raw input");
+  }
+  try
+  {
+    for (uint32_t i = 0; i < num_inputs; ++i)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_input_tensorinfo(session_, i, &ti));
+
+      // allocate memory for data
+      auto bufsz = bufsize_for(&ti);
+      inputs[i].alloc(bufsz);
+
+      std::ifstream file(filename, std::ios::ate | std::ios::binary);
+      auto filesz = file.tellg();
+      if (bufsz != filesz)
+      {
+        throw std::runtime_error("Input Size does not match: " + std::to_string(bufsz) +
+                                 " expected, but " + std::to_string(filesz) + " provided.");
+      }
+      file.seekg(0, std::ios::beg);
+      file.read(reinterpret_cast<char *>(inputs[i].data()), filesz);
+      file.close();
+
+      NNPR_ENSURE_STATUS(nnfw_set_input(session_, i, ti.dtype, inputs[i].data(), bufsz));
+      NNPR_ENSURE_STATUS(nnfw_set_input_layout(session_, i, NNFW_LAYOUT_CHANNELS_LAST));
+    }
+  }
+  catch (const std::exception &e)
+  {
+    std::cerr << e.what() << std::endl;
+    std::exit(-1);
+  }
+};
+
+void RawFormatter::dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs)
+{
+  uint32_t num_outputs;
+  NNPR_ENSURE_STATUS(nnfw_output_size(session_, &num_outputs));
+  // TODO: Support multiple outputs
+  // Available options are same.
+  if (num_outputs != 1)
+  {
+    throw std::runtime_error("Only 1 output is supported for raw input");
+  }
+  try
+  {
+    for (uint32_t i = 0; i < num_outputs; i++)
+    {
+      nnfw_tensorinfo ti;
+      NNPR_ENSURE_STATUS(nnfw_output_tensorinfo(session_, i, &ti));
+      auto bufsz = bufsize_for(&ti);
+
+      std::ofstream file(filename + "." + std::to_string(i), std::ios::out | std::ios::binary);
+      file.write(reinterpret_cast<const char *>(outputs[i].data()), bufsz);
+      file.close();
+      std::cerr << filename + "." + std::to_string(i) + " is generated.\n";
+    }
+  }
+  catch (const std::runtime_error &e)
+  {
+    std::cerr << "Error during dumpOutputs on nnpackage_run : " << e.what() << std::endl;
+    std::exit(-1);
+  }
+}
+} // end of namespace nnpkg_run
diff --git a/tests/tools/nnpackage_run/src/rawformatter.h b/tests/tools/nnpackage_run/src/rawformatter.h

new file mode 100644 (file)

index 0000000..8bfc354
--- /dev/null
+++ b/tests/tools/nnpackage_run/src/rawformatter.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNPACKAGE_RUN_RAWFORMATTER_H__
+#define __NNPACKAGE_RUN_RAWFORMATTER_H__
+
+#include "allocation.h"
+#include "formatter.h"
+#include "types.h"
+
+#include <string>
+#include <vector>
+
+struct nnfw_session;
+
+namespace nnpkg_run
+{
+class RawFormatter : public Formatter
+{
+public:
+  RawFormatter(nnfw_session *sess) : Formatter(sess) {}
+  void loadInputs(const std::string &filename, std::vector<Allocation> &inputs) override;
+  void dumpOutputs(const std::string &filename, std::vector<Allocation> &outputs) override;
+};
+} // namespace nnpkg_run
+
+#endif // __NNPACKAGE_RUN_RAWFORMATTER_H__
diff --git a/tests/tools/tflite_comparator/src/tflite_comparator.cc b/tests/tools/tflite_comparator/src/tflite_comparator.cc

index 65a40b49380dcd5bd95353c1d275ddf402547059..b7422ed3f003212eb58eac3de2317a1817135d15 100644 (file)
--- a/tests/tools/tflite_comparator/src/tflite_comparator.cc
+++ b/tests/tools/tflite_comparator/src/tflite_comparator.cc
@@ -106,6 +106,8 @@ inline size_t sizeOfNnfwType(NNFW_TYPE type)
      case NNFW_TYPE_TENSOR_QUANT8_ASYMM:
      case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
        return 1;
+    case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+      return 2;
      case NNFW_TYPE_TENSOR_FLOAT32:
      case NNFW_TYPE_TENSOR_INT32:
        return 4;
@@ -236,6 +238,8 @@ int main(const int argc, char **argv)
          case NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED:
            randomData<int8_t>(randgen, inputs[i]);
            break;
+        case NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED:
+          randomData<int16_t>(randgen, inputs[i]);
          case NNFW_TYPE_TENSOR_FLOAT32:
            randomData<float>(randgen, inputs[i]);
            break;
diff --git a/tools/cross/install_rootfs.sh b/tools/cross/install_rootfs.sh

index fa32c73501983e6a30b78b47a0860e8d3819f22e..f03d523714a7e623da7eb62d096d466e3242ad3e 100755 (executable)
--- a/tools/cross/install_rootfs.sh
+++ b/tools/cross/install_rootfs.sh
@@ -1,6 +1,4 @@
  #!/usr/bin/env bash
-set -x
-
  usage()
  {
      echo "Usage: $0 [BuildArch] [LinuxCodeName] [--setproxy=IP] [--skipunmount]"
@@ -29,7 +27,7 @@ __UbuntuPackages="build-essential"
  # other development supports
  __UbuntuPackages+=" ocl-icd-opencl-dev"
  __UbuntuPackages+=" libhdf5-dev"
-__UbuntuBoostPackages=" libboost-all-dev"
+__UbuntuPackages+=" libboost-all-dev"
  
  # symlinks fixer
  __UbuntuPackages+=" symlinks"
@@ -77,7 +75,6 @@ for i in "$@" ; do
              ;;
          focal)
              __LinuxCodeName=focal
-            __UbuntuBoostPackages=" libboost1.67-all-dev"
              ;;
          --setproxy*)
              proxyip="${i#*=}"
@@ -95,8 +92,7 @@ for i in "$@" ; do
      esac
  done
  
-# Current runtime build system supports boost version under 1.70
-__UbuntuPackages+="$__UbuntuBoostPackages"
+set -x
  
  __RootfsDir="$__CrossDir/rootfs/$__BuildArch"
  
diff --git a/tools/nnpackage_tool/model2nnpkg/README.md b/tools/nnpackage_tool/model2nnpkg/README.md

index 9d4676e232e83c93be879a0d9c6feb40f922ae95..34fd49d8b38c8be35aacb733b5b9dfaa552d8bce 100644 (file)
--- a/tools/nnpackage_tool/model2nnpkg/README.md
+++ b/tools/nnpackage_tool/model2nnpkg/README.md
@@ -1,6 +1,6 @@
  # model2nnpkg
  
-`model2nnpkg` is a tool to convert model (either `tflite` or `circle`) to `nnpackage`.
+`model2nnpkg` is a tool to convert model (e.g. `tflite`, `circle` or `tvn`) to `nnpackage`.
  
  It takes `modelfile` as input and generates `nnpackage`.
  
@@ -8,7 +8,7 @@ It takes `modelfile` as input and generates `nnpackage`.
  
  ```
  Usage: model2nnpkg.sh [options] modelfile
-Convert modelfile (either tflite or circle) to nnpackage.
+Convert modelfile to nnpackage.
  
  Options:
      -h   show this help
diff --git a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh

index 2b26e4068a14d56f7b26ff06b0a7b8496450e558..9374af737a3d809257ef615bba8948e9f0cddfe9 100755 (executable)
--- a/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
+++ b/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh
@@ -10,7 +10,7 @@ config_src=""
  
  usage() {
    echo "Usage: $progname [options] modelfile"
-  echo "Convert modelfile (either tflite or circle) to nnpackage."
+  echo "Convert modelfile (tflite, circle or tvn) to nnpackage."
    echo ""
    echo "Options:"
    echo "    -h   show this help"
@@ -78,7 +78,7 @@ fi
  cat > "$outdir"/"$name"/metadata/MANIFEST <<-EOF
  {
    "major-version" : "1",
-  "minor-version" : "1",
+  "minor-version" : "2",
    "patch-version" : "0",
    "configs"     : [ "$config" ],
    "models"      : [ "$modelfile" ],
diff --git a/tools/tflitefile_tool/config_saver.py b/tools/tflitefile_tool/config_saver.py

deleted file mode 100755 (executable)

index abf2c0c..0000000
--- a/tools/tflitefile_tool/config_saver.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
-
-
-class ConfigSaver(object):
-    def __init__(self, file_name, operator):
-        self.file_name = file_name
-        self.operator = operator
-        # Set self.verbose to 1 level to print more information
-        self.verbose = 1
-        self.op_idx = operator.operator_idx
-        self.op_name = operator.opcode_str
-
-        self.f = open(file_name, 'at')
-
-    def __del__(self):
-        self.f.close()
-
-    def SaveInfo(self):
-        self.f.write("[{}]\n".format(self.op_idx))
-        if (self.op_name == 'CONV_2D'):
-            self.SaveConv2DInputs()
-        else:
-            self.SaveInputs()
-
-        self.SaveOutputs()
-
-        self.SaveAttributes()
-
-        self.f.write('\n')
-
-    def SaveConv2DInputs(self):
-        if (len(self.operator.inputs) != 3):
-            raise AssertionError('Conv2D input count should be 3')
-
-        inputs = self.operator.inputs[0]
-        weights = self.operator.inputs[1]
-        bias = self.operator.inputs[2]
-
-        self.f.write("input: {}\n".format(
-            TensorPrinter(self.verbose, inputs).GetShapeString()))
-        self.f.write("input_type: {}\n".format(inputs.type_name))
-        self.f.write("weights: {}\n".format(
-            TensorPrinter(self.verbose, weights).GetShapeString()))
-        self.f.write("weights_type: {}\n".format(weights.type_name))
-        self.f.write("bias: {}\n".format(
-            TensorPrinter(self.verbose, bias).GetShapeString()))
-        self.f.write("bias_type: {}\n".format(bias.type_name))
-
-    def SaveInputs(self):
-        total = len(self.operator.inputs)
-        self.f.write("input_counts: {}\n".format(total))
-        for idx in range(total):
-            tensor = self.operator.inputs[idx]
-            input_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
-            self.f.write("input{}: {}\n".format(idx, input_shape_str))
-            self.f.write("input{}_type: {}\n".format(idx, tensor.type_name))
-
-    def SaveOutputs(self):
-        total = len(self.operator.outputs)
-        self.f.write("output_counts: {}\n".format(total))
-        for idx in range(total):
-            tensor = self.operator.outputs[idx]
-            output_shape_str = TensorPrinter(self.verbose, tensor).GetShapeString()
-            self.f.write("output{}: {}\n".format(idx, output_shape_str))
-            self.f.write("output{}_type: {}\n".format(idx, tensor.type_name))
-
-    def SaveFilter(self):
-        self.f.write("filter_w: {}\n".format(self.operator.options.FilterWidth()))
-        self.f.write("filter_h: {}\n".format(self.operator.options.FilterHeight()))
-
-    def SaveStride(self):
-        self.f.write("stride_w: {}\n".format(self.operator.options.StrideW()))
-        self.f.write("stride_h: {}\n".format(self.operator.options.StrideH()))
-
-    def SaveDilation(self):
-        self.f.write("dilation_w: {}\n".format(self.operator.options.DilationWFactor()))
-        self.f.write("dilation_h: {}\n".format(self.operator.options.DilationHFactor()))
-
-    def SavePadding(self):
-        if self.operator.options.Padding() == 0:
-            self.f.write("padding: SAME\n")
-        elif self.operator.options.Padding() == 1:
-            self.f.write("padding: VALID\n")
-
-    def SaveFusedAct(self):
-        if self.operator.fused_activation is not "NONE":
-            self.f.write("fused_act: {}\n".format(self.operator.fused_activation))
-
-    def SaveAttributes(self):
-        # operator option
-        # Some operations does not have option. In such case no option is printed
-        option_str = OptionPrinter(self.verbose, self.op_name,
-                                   self.operator.options).GetOptionString()
-        if self.op_name == 'AVERAGE_POOL_2D' or self.op_name == 'MAX_POOL_2D':
-            self.SaveFilter()
-            self.SaveStride()
-            self.SavePadding()
-        elif self.op_name == 'CONV_2D':
-            self.SaveStride()
-            self.SaveDilation()
-            self.SavePadding()
-        elif self.op_name == 'TRANSPOSE_CONV':
-            self.SaveStride()
-            self.SavePadding()
-        elif self.op_name == 'DEPTHWISE_CONV_2D':
-            self.SaveStride()
-            self.SaveDilation()
-            self.SavePadding()
-            self.f.write("depthmultiplier: {}\n".format(
-                self.operator.options.DepthMultiplier()))
-
-        self.SaveFusedAct()
diff --git a/tools/tflitefile_tool/graph_stats.py b/tools/tflitefile_tool/graph_stats.py

deleted file mode 100755 (executable)

index 85acaef..0000000
--- a/tools/tflitefile_tool/graph_stats.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class GraphStats():
-    def __init__(self):
-        from collections import Counter
-        from collections import defaultdict
-        self.op_counts = Counter()
-        self.filled_memory = 0
-        self.total_memory = 0
-
-    def accumulate_op_count(self, op_str, count):
-        self.op_counts[op_str] += count
-
-    def accumulate_filled_memory(self, size):
-        self.filled_memory += size
-
-    def accumulate_total_memory(self, size):
-        self.total_memory += size
-
-    def __iadd__(self, other):
-        self.op_counts += other.op_counts
-        self.filled_memory += other.filled_memory
-        self.total_memory += other.total_memory
-        return self
-
-
-def PrintGraphStats(stats, verbose):
-    print("Number of all operator types: {0}".format(len(stats.op_counts)))
-
-    # Print op type stats
-    for op_name in sorted(stats.op_counts.keys()):
-        occur = stats.op_counts[op_name]
-        optype_info_str = "\t{:38}: {:4}".format(op_name, occur)
-
-        print(optype_info_str)
-
-    summary_str = "{0:46}: {1:4}".format("Number of all operators",
-                                         sum(stats.op_counts.values()))
-    print(summary_str)
-    print('')
-
-    # Print memory stats
-    from tensor_printer import ConvertBytesToHuman
-    print("Expected TOTAL  memory: {0}".format(ConvertBytesToHuman(stats.total_memory)))
-    print("Expected FILLED memory: {0}".format(ConvertBytesToHuman(stats.filled_memory)))
-    print('')
-
-
-def CalcGraphStats(op_parser):
-    stats = GraphStats()
-
-    for type_str, oper_list in op_parser.operators_per_type.items():
-        # number of occurrence of this operator type
-        occur = len(oper_list)
-        stats.accumulate_op_count(type_str, occur)
-
-        # this operator type can be computed?
-        can_compute = oper_list[0].operation.can_compute
-
-    total_memory = 0
-    filled_memory = 0  # only memory for constant
-    for tensor in op_parser.GetAllTensors():
-        if tensor.tf_buffer.DataLength() != 0:
-            filled_memory += tensor.memory_size
-        total_memory += tensor.memory_size
-    stats.accumulate_filled_memory(filled_memory)
-    stats.accumulate_total_memory(total_memory)
-
-    return stats
diff --git a/tools/tflitefile_tool/ir/README.md b/tools/tflitefile_tool/ir/README.md

new file mode 100644 (file)

index 0000000..2625dfb
--- /dev/null
+++ b/tools/tflitefile_tool/ir/README.md
@@ -0,0 +1,5 @@
+# IR
+
+A model has a subgraph or subgraphs. A subgraph has operators and tensors.
+
+Parser will use these IRs as data.
diff --git a/tools/tflitefile_tool/ir/__init__.py b/tools/tflitefile_tool/ir/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tools/tflitefile_tool/ir/graph_stats.py b/tools/tflitefile_tool/ir/graph_stats.py

new file mode 100755 (executable)

index 0000000..5aebdbe
--- /dev/null
+++ b/tools/tflitefile_tool/ir/graph_stats.py
@@ -0,0 +1,59 @@
+#!/usr/bin/python
+
+# Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class GraphStats():
+    def __init__(self):
+        from collections import Counter
+        from collections import defaultdict
+        self.op_counts = Counter()
+        self.filled_memory = 0
+        self.total_memory = 0
+
+    def accumulate_op_count(self, op_str, count):
+        self.op_counts[op_str] += count
+
+    def accumulate_filled_memory(self, size):
+        self.filled_memory += size
+
+    def accumulate_total_memory(self, size):
+        self.total_memory += size
+
+    def __iadd__(self, other):
+        self.op_counts += other.op_counts
+        self.filled_memory += other.filled_memory
+        self.total_memory += other.total_memory
+        return self
+
+
+def CalcGraphStats(subg):
+    stats = GraphStats()
+
+    for type_str, oper_list in subg.optypes_map.items():
+        # number of occurrence of this operator type
+        occur = len(oper_list)
+        stats.accumulate_op_count(type_str, occur)
+
+    total_memory = 0
+    filled_memory = 0  # only memory for constant
+    for index, tensor in subg.tensors_map.items():
+        if tensor.buffer is not None:
+            filled_memory += tensor.memory_size
+        total_memory += tensor.memory_size
+    stats.accumulate_filled_memory(filled_memory)
+    stats.accumulate_total_memory(total_memory)
+
+    return stats
diff --git a/tools/tflitefile_tool/ir/operator.py b/tools/tflitefile_tool/ir/operator.py

new file mode 100644 (file)

index 0000000..0601e61
--- /dev/null
+++ b/tools/tflitefile_tool/ir/operator.py
@@ -0,0 +1,108 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Operator(object):
+    def __init__(self):
+        self._index = -1
+        self._inputs = []
+        self._outputs = []
+        self._op_name = ""
+        self._actviation = ""
+        self._options = ""
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''operator's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''inputs'''
+
+    @property
+    def inputs(self):
+        '''Operators's input tensors as a list which consists of Tensors'''
+        return self._inputs
+
+    @inputs.setter
+    def inputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._inputs = value
+
+    '''outputs'''
+
+    @property
+    def outputs(self):
+        '''Operators's output tensors as a list which consists of Tensors'''
+        return self._outputs
+
+    @outputs.setter
+    def outputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._outputs = value
+
+    '''op_name'''
+
+    @property
+    def op_name(self):
+        '''Operator's name str'''
+        return self._op_name
+
+    @op_name.setter
+    def op_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._op_name = value
+
+    '''actviation'''
+
+    @property
+    def actviation(self):
+        '''Operator's actviation str'''
+        return self._actviation
+
+    @actviation.setter
+    def actviation(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._actviation = value
+
+    '''options'''
+
+    @property
+    def options(self):
+        '''Operator's options str'''
+        return self._options
+
+    @options.setter
+    def options(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._options = value
diff --git a/tools/tflitefile_tool/ir/subgraph.py b/tools/tflitefile_tool/ir/subgraph.py

new file mode 100644 (file)

index 0000000..e687134
--- /dev/null
+++ b/tools/tflitefile_tool/ir/subgraph.py
@@ -0,0 +1,170 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import MutableMapping
+'''optype -> Operator Index List'''
+
+
+class OpTypesMap(MutableMapping):
+    def __init__(self, *args, **kwargs):
+        self.store = dict()
+        self.update(dict(*args, **kwargs))
+
+    def __getitem__(self, key):
+        return self.store[self._keytransform(key)]
+
+    def __setitem__(self, key, value):
+        k = self._keytransform(key)
+        if not k in self.store.keys():
+            self.store[k] = []
+        self.store[k].append(value)
+
+    def __delitem__(self, key):
+        del self.store[self._keytransform(key)]
+
+    def __iter__(self):
+        return iter(self.store)
+
+    def __len__(self):
+        return len(self.store)
+
+    def _keytransform(self, key):
+        if not isinstance(key, str):
+            raise TypeError("must be set to a str")
+        return key
+
+
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Subgraph(object):
+    def __init__(self):
+        self._index = -1
+        self._inputs = []
+        self._outputs = []
+        self._subg_name = ""
+        self._model_name = ""
+        self._tensors_map = {}
+        self._operators_map = {}
+        self._optypes_map = OpTypesMap()
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''Subgraph's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''inputs'''
+
+    @property
+    def inputs(self):
+        '''Subgraph's input tensors as a list which consists of Tensors'''
+        return self._inputs
+
+    @inputs.setter
+    def inputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._inputs = value
+
+    '''outputs'''
+
+    @property
+    def outputs(self):
+        '''Subgraph's output tensors as a list which consists of Tensors'''
+        return self._outputs
+
+    @outputs.setter
+    def outputs(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._outputs = value
+
+    '''subg_name'''
+
+    @property
+    def subg_name(self):
+        '''Subgraph's name str'''
+        return self._subg_name
+
+    @subg_name.setter
+    def subg_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._subg_name = value
+
+    '''model_name'''
+
+    @property
+    def model_name(self):
+        '''Model name str'''
+        return self._model_name
+
+    @model_name.setter
+    def model_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._model_name = value
+
+    '''tensors_map'''
+
+    @property
+    def tensors_map(self):
+        '''Subgraph's all tensors(key:index, value:Tensor)'''
+        return self._tensors_map
+
+    @tensors_map.setter
+    def tensors_map(self, value):
+        if not isinstance(value, dict):
+            raise TypeError("must be set to a dict")
+        self._tensors_map = value
+
+    '''operators_map'''
+
+    @property
+    def operators_map(self):
+        '''Subgraph's operators(key:index, value:Operator)'''
+        return self._operators_map
+
+    @operators_map.setter
+    def operators_map(self, value):
+        if not isinstance(value, dict):
+            raise TypeError("must be set to a dict")
+        self._operators_map = value
+
+    '''optypes_map'''
+
+    @property
+    def optypes_map(self):
+        '''Subgraph's operators per type(key:optype, value:[op_indice])'''
+        return self._optypes_map
+
+    @optypes_map.setter
+    def optypes_map(self, value):
+        if not isinstance(value, OpTypesMap):
+            raise TypeError("must be set to a OpTypesMap")
+        self._optypes_map = value
diff --git a/tools/tflitefile_tool/ir/tensor.py b/tools/tflitefile_tool/ir/tensor.py

new file mode 100644 (file)

index 0000000..f0f35a7
--- /dev/null
+++ b/tools/tflitefile_tool/ir/tensor.py
@@ -0,0 +1,120 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+NOTE
+- This class expresses a wrapping class for a native class.
+- Just use this class as an interface.
+"""
+
+
+class Tensor(object):
+    def __init__(self):
+        self._index = -1
+        self._tensor_name = ""
+        self._buffer = None
+        self._buffer_index = -1
+        self._type_name = ""
+        self._shape = []
+        self._memory_size = -1
+
+    '''index'''
+
+    @property
+    def index(self):
+        '''Tensor's int type index'''
+        return self._index
+
+    @index.setter
+    def index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._index = value
+
+    '''tensor_name'''
+
+    @property
+    def tensor_name(self):
+        '''Tensor's name str'''
+        return self._tensor_name
+
+    @tensor_name.setter
+    def tensor_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._tensor_name = value
+
+    '''buffer'''
+
+    @property
+    def buffer(self):
+        '''Tensor's buffer as a numpy instance type'''
+        return self._buffer
+
+    @buffer.setter
+    def buffer(self, value):
+        self._buffer = value
+
+    '''buffer_index'''
+
+    @property
+    def buffer_index(self):
+        '''Tensor's int type buffer index'''
+        return self._buffer_index
+
+    @buffer_index.setter
+    def buffer_index(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._buffer_index = value
+
+    '''type_name'''
+
+    @property
+    def type_name(self):
+        '''Tensor's type name str'''
+        return self._type_name
+
+    @type_name.setter
+    def type_name(self, value):
+        if not isinstance(value, str):
+            raise TypeError("must be set to a str")
+        self._type_name = value
+
+    '''shape'''
+
+    @property
+    def shape(self):
+        '''Tensor's shape as a list'''
+        return self._shape
+
+    @shape.setter
+    def shape(self, value):
+        if not isinstance(value, list):
+            raise TypeError("must be set to a list")
+        self._shape = value
+
+    '''memory_size'''
+
+    @property
+    def memory_size(self):
+        '''Tensor's memory size as int type'''
+        return self._memory_size
+
+    @memory_size.setter
+    def memory_size(self, value):
+        if not isinstance(value, int):
+            raise TypeError("must be set to an integer")
+        self._memory_size = value
diff --git a/tools/tflitefile_tool/model_parser.py b/tools/tflitefile_tool/model_parser.py

index ed534c14c2a41ac2805bf8f5297098a7d7bee0ca..76c43acfc5053a83b11a64e05d0bb30c01921689 100755 (executable)
--- a/tools/tflitefile_tool/model_parser.py
+++ b/tools/tflitefile_tool/model_parser.py
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
  #
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
@@ -13,29 +13,26 @@
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  # See the License for the specific language governing permissions and
  # limitations under the License.
+'''
+Why is this file named as `model_parser.py` which is same to `parser/model_parser.py`?
+- Until now, users have used by the path `tools/tflitefile_tool/model_parser.py`.
+- Let's change the name to the proper name like `main.py` after the task for revision is done.
+'''
  
-import os
-import sys
-import numpy
-import flatbuffers
-import tflite.Model
-import tflite.SubGraph
  import argparse
-import graph_stats
-from operator_parser import OperatorParser
-from subgraph_printer import SubgraphPrinter
-from model_saver import ModelSaver
+from parser.model_parser import ModelParser
+from printer.subgraph_printer import SubgraphPrinter
+from saver.model_saver import ModelSaver
  
  
-class TFLiteModelFileParser(object):
+class MainOption(object):
      def __init__(self, args):
-        # Read flatbuffer file descriptor using argument
-        self.tflite_file = args.input_file
+        self.model_file = args.input_file
  
-        # Set print level (0 ~ 1)
+        # Set print level (0 ~ 2)
          self.print_level = args.verbose
-        if (args.verbose > 1):
-            self.print_level = 1
+        if (args.verbose > 2):
+            self.print_level = 2
          if (args.verbose < 0):
              self.print_level = 0
  
@@ -66,54 +63,24 @@ class TFLiteModelFileParser(object):
          if self.save == True:
              self.save_prefix = args.prefix
  
-    def PrintModel(self, model_name, op_parser):
-        printer = SubgraphPrinter(self.print_level, op_parser, model_name)
  
-        if self.print_all_tensor == False:
-            printer.SetPrintSpecificTensors(self.print_tensor_index)
+def PrintSubgraph(option, subg):
+    printer = SubgraphPrinter(option.print_level, subg)
  
-        if self.print_all_operator == False:
-            printer.SetPrintSpecificOperators(self.print_operator_index)
+    if option.print_all_tensor == False:
+        printer.SetPrintSpecificTensors(option.print_tensor_index)
  
-        printer.PrintInfo()
+    if option.print_all_operator == False:
+        printer.SetPrintSpecificOperators(option.print_operator_index)
  
-    def SaveModel(self, model_name, op_parser):
-        saver = ModelSaver(model_name, op_parser)
+    printer.PrintInfo()
  
-        if self.save_config == True:
-            saver.SaveConfigInfo(self.save_prefix)
  
-    def main(self):
-        # Generate Model: top structure of tflite model file
-        buf = self.tflite_file.read()
-        buf = bytearray(buf)
-        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+def SaveSubgraph(option, subg):
+    saver = ModelSaver(subg)
  
-        stats = graph_stats.GraphStats()
-        # Model file can have many models
-        for subgraph_index in range(tf_model.SubgraphsLength()):
-            tf_subgraph = tf_model.Subgraphs(subgraph_index)
-            model_name = "#{0} {1}".format(subgraph_index, tf_subgraph.Name())
-            # 0th subgraph is main subgraph
-            if (subgraph_index == 0):
-                model_name += " (MAIN)"
-
-            # Parse Operators
-            op_parser = OperatorParser(tf_model, tf_subgraph)
-            op_parser.Parse()
-
-            stats += graph_stats.CalcGraphStats(op_parser)
-
-            if self.save == False:
-                # print all of operators or requested objects
-                self.PrintModel(model_name, op_parser)
-            else:
-                # save all of operators in this model
-                self.SaveModel(model_name, op_parser)
-
-        print('==== Model Stats ({} Subgraphs) ===='.format(tf_model.SubgraphsLength()))
-        print('')
-        graph_stats.PrintGraphStats(stats, self.print_level)
+    if option.save_config == True:
+        saver.SaveConfigInfo(option.save_prefix)
  
  
  if __name__ == '__main__':
@@ -138,6 +105,14 @@ if __name__ == '__main__':
      arg_parser.add_argument(
          '-p', '--prefix', help="file prefix to be saved (with -c/--config option)")
      args = arg_parser.parse_args()
+    option = MainOption(args)
+
+    subg_list = ModelParser(option.model_file).Parse()
  
-    # Call main function
-    TFLiteModelFileParser(args).main()
+    for subg in subg_list:
+        if option.save == False:
+            # print all of operators or requested objects
+            PrintSubgraph(option, subg)
+        else:
+            # save all of operators in this model
+            SaveSubgraph(option, subg)
diff --git a/tools/tflitefile_tool/model_saver.py b/tools/tflitefile_tool/model_saver.py

deleted file mode 100755 (executable)

index 15037a1..0000000
--- a/tools/tflitefile_tool/model_saver.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from config_saver import ConfigSaver
-
-
-class ModelSaver(object):
-    def __init__(self, model_name, op_parser):
-        self.model_name = model_name
-        self.op_parser = op_parser
-
-    def SaveConfigInfo(self, prefix):
-        print("Save model configuration file")
-        for type_str, oper_list in self.op_parser.operators_per_type.items():
-            if prefix:
-                file_name = "{}_{}_{}.config".format(prefix, self.model_name, type_str)
-            else:
-                file_name = "{}_{}.config".format(self.model_name, type_str)
-            print("{} file is generated".format(file_name))
-            with open(file_name, 'wt') as f:
-                f.write("# {}, Total count: {}\n\n".format(type_str, len(oper_list)))
-            for operator in oper_list:
-                ConfigSaver(file_name, operator).SaveInfo()
diff --git a/tools/tflitefile_tool/operation.py b/tools/tflitefile_tool/operation.py

deleted file mode 100755 (executable)

index 6aa7527..0000000
--- a/tools/tflitefile_tool/operation.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Conv2DOptions
-import tflite.Pool2DOptions
-import tflite.BuiltinOptions
-import tflite.Tensor
-from tensor_wrapping import Tensor
-import math
-
-
-# NOTICE
-# - an internal class. do not import outside this file.
-# - REF: https://stackoverflow.com/questions/551038/private-implementation-class-in-python
-class _OperationComputeMethod(object):
-    '''
-    NOTE: How to count operations of convolution(and also pooling)?
-
-    If we know operations of output's one element, we can calculate total output's operations.
-    For example, consider output Shape[3,3]
-    [ e11 e12 e13 ]
-    [ e21 e22 e23 ]
-    [ e31 e32 e33 ]
-    If we know operations for calculation of e11, we can know total operations of output(e11, e12, ... e33)
-    by operations of e11 * 9(total number of elements)
-
-    So we only need to know how to calculate operations of e11.
-    For this, just think how to conv operation to the output's element
-    If input_channel is 1, we can only think of kernel_size(kernel_w and kernel_h).
-    For example, consider input Shape[3,3] and kernel Shape[2,2]
-    [ i11 i12 i13 ]   [ k11 k12 ]   [ o11 o12 o13 ]
-    [ i21 i22 i23 ] * [ k21 k22 ] = [ o21 o22 o23 ]
-    [ i31 i32 i33 ]                 [ o31 o32 o33 ]
-
-    Conv operation: for o11, i11 * k11 + i21 * k21 + i12 * k12 + i22 * k22 = o11
-    On above conv operation, mul operations are done at 4 times(== kernel_w * kernel_h)
-    and add operations are dont at 3 times(== kernel_w * kernel_h - 1)
-    and also, bias will be done and it will be counted on add operations.
-
-    Anyway, we can calculate total operations on this way. This can apply to the way of pooling.
-    '''
-
-    def ComputeOperationForConv2D(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .Conv2DOptions)
-
-        # NOTE: Assume that conv2d operator always take 3 tensors as inputs
-        #       and both width and height are the same.
-        # operator_inputs[]: [input_tensor, weight_tensor, bias_tensor]
-        # operator_outputs[]: [output_tensor]
-        # tflite's tensor shape: [N,H,W,C]
-        input_tensor = inputs[0].tf_tensor
-        weight_tensor = inputs[1].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        # kernel_ops = (kernel_w * kernel_h * input_channel * 2(multiply and add))
-        kernel_ops = (
-            weight_tensor.Shape(2) * weight_tensor.Shape(1) * input_tensor.Shape(3))
-
-        # total ops
-        #     = batch_size * output_channel * output_width * output_height * kernel_ops
-        total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
-                     output_tensor.Shape(2) * output_tensor.Shape(1))
-
-        add_instr_num = (total_ops * (kernel_ops + 1))  # bias
-        mul_instr_num = (total_ops * (kernel_ops))
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    # NOTE: Reference the comment 'NOTE' of ComputeOperationForConv2D
-
-    def ComputeOperationForPooling(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .Pool2DOptions)
-
-        dummy_input_tensor = inputs[0].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        pool2d_options = tflite.Pool2DOptions.Pool2DOptions()
-        pool2d_options.Init(tf_operator.BuiltinOptions().Bytes,
-                            tf_operator.BuiltinOptions().Pos)
-
-        # kernel_ops = kernel_w * kernel_h
-        kernel_ops = (pool2d_options.FilterWidth() * pool2d_options.FilterHeight())
-
-        # total ops
-        #     = batch_size * output_channel * output_width * output_height *
-        #       kernel_ops(kernel_w * kernel_h)
-        total_ops = (output_tensor.Shape(0) * output_tensor.Shape(3) *
-                     output_tensor.Shape(2) * output_tensor.Shape(1))
-
-        add_instr_num = (total_ops * kernel_ops - 1)
-        mul_instr_num = (total_ops * kernel_ops)
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForSoftmax(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .SoftmaxOptions)
-
-        input_tensor = inputs[0].tf_tensor
-
-        dummy_batch_size = input_tensor.Shape(0)
-        input_dim = input_tensor.Shape(1)
-
-        # Softmax(x_i) = exp(x_i) / sum of exp(x)
-        add_instr_num = input_dim - 1  # sum of exp(x)
-        mul_instr_num = input_dim  # /
-        nonlinear_instr_num = input_dim + input_dim  # sum of exp(x) and exp(x_i)
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForFullyConnected(self, tf_operator, inputs, outputs):
-        assert (
-            tf_operator.BuiltinOptionsType() == tflite.BuiltinOptions.BuiltinOptions()
-            .FullyConnectedOptions)
-
-        # NOTE: Assume that fully_connected operator always take 3 tensors as inputs
-        #       and its X tensor's shape is [1, 1, 1, input_dim] with
-        #       its output Y [1, output_dim]
-        input_tensor = inputs[0].tf_tensor
-        output_tensor = outputs[0].tf_tensor
-
-        # ops_per_element
-        #     = input_dim(multiplication) + input_dim-1(addition) + 1(bias)
-        # total_ops
-        #     = ops_per_elem * output_dim
-
-        add_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
-        mul_instr_num = (input_tensor.Shape(3) * output_tensor.Shape(1))
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def ComputeOperationForNothing(self, tf_operator, inputs, outputs):
-        add_instr_num = 0
-        mul_instr_num = 0
-        nonlinear_instr_num = 0
-        return (add_instr_num, mul_instr_num, nonlinear_instr_num)
-
-    def NYI_ComputeOperation(self, tf_operator, inputs, outputs):
-        pass
-
-    operation_to_method_map = {
-        # Inceptionv3
-        "CONV_2D": ComputeOperationForConv2D,
-        "AVERAGE_POOL_2D": ComputeOperationForPooling,
-        "MAX_POOL_2D": ComputeOperationForPooling,
-        "SOFTMAX": ComputeOperationForSoftmax,
-        "FULLY_CONNECTED": ComputeOperationForFullyConnected,
-        "CONCATENATION": ComputeOperationForNothing,
-        # Extension
-        "TOPK_V2": NYI_ComputeOperation,
-        "SUB": NYI_ComputeOperation,
-        "STRIDED_SLICE": NYI_ComputeOperation,
-        "RESHAPE": NYI_ComputeOperation,
-        "GATHER": NYI_ComputeOperation,
-        "RESIZE_BILINEAR": NYI_ComputeOperation,
-        "CAST": NYI_ComputeOperation,
-        "ADD": NYI_ComputeOperation,
-        "MUL": NYI_ComputeOperation,
-        "DIV": NYI_ComputeOperation,
-        "CUSTOM(TensorFlowMax)": NYI_ComputeOperation,
-        "CUSTOM": NYI_ComputeOperation,
-    }
-
-
-class Operation(object):
-    def __init__(self, tf_operator, operator_str, inputs, outputs):
-        self.tf_operator = tf_operator
-        self.operator_str = operator_str
-        self.inputs = inputs
-        self.outputs = outputs
-        self.add_instr_num = 0
-        self.mul_instr_num = 0
-        self.nonlinear_instr_num = 0
-        self.can_compute = True
-        self.Compute()
-
-    def Compute(self):
-        comp_map = _OperationComputeMethod().operation_to_method_map
-        if not self.operator_str in comp_map.keys():
-            self.can_compute = False
-            return
-
-        method = comp_map[self.operator_str]
-        if method.__name__ == _OperationComputeMethod().NYI_ComputeOperation.__name__:
-            self.can_compute = False
-            return
-
-        self.add_instr_num, self.mul_instr_num, self.nonlinear_instr_num = method(
-            _OperationComputeMethod(), self.tf_operator, self.inputs, self.outputs)
-
-    def TotalInstrNum(self):
-        return (self.add_instr_num + self.mul_instr_num + self.nonlinear_instr_num)
diff --git a/tools/tflitefile_tool/operator_parser.py b/tools/tflitefile_tool/operator_parser.py

deleted file mode 100755 (executable)

index 2c230c2..0000000
--- a/tools/tflitefile_tool/operator_parser.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Model
-import tflite.SubGraph
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-from operator_wrapping import Operator, EnumStrMaps
-from tensor_wrapping import Tensor, SetTensorTypeStr
-from operation import Operation
-
-
-class OperatorParser(object):
-    def __init__(self, tf_model, tf_subgraph):
-        self.tf_model = tf_model
-        self.tf_subgraph = tf_subgraph
-        self.operators_in_list = list()
-        self.operators_per_type = dict()
-        # Tensor type string table
-        SetTensorTypeStr()
-
-    def Parse(self):
-        for operator_idx in range(self.tf_subgraph.OperatorsLength()):
-            tf_operator = self.tf_subgraph.Operators(operator_idx)
-            opcode_str = self.GetOpcodeStr(tf_operator)
-            input_tensors = self.GetInputTensors(tf_operator)
-            output_tensors = self.GetOutputTensors(tf_operator)
-
-            op = Operator(operator_idx, tf_operator, input_tensors, output_tensors,
-                          opcode_str)
-            self.AppendOperator(op)
-
-    def GetOpcodeStr(self, tf_operator):
-        opcode_list_idx = tf_operator.OpcodeIndex()
-        opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
-        opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
-        if opcode_id == 32:
-            # Custom operator
-            custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
-            custom_op_name = custom_operator.CustomCode().decode('utf-8')
-            opcode_str = opcode_str + "(" + custom_op_name + ")"
-        return opcode_str
-
-    def GetInputTensors(self, tf_operator):
-        operator_inputs = tf_operator.InputsAsNumpy()
-        return self.GetTensors(operator_inputs)
-
-    def GetOutputTensors(self, tf_operator):
-        operator_outputs = tf_operator.OutputsAsNumpy()
-        return self.GetTensors(operator_outputs)
-
-    def GetTensors(self, tf_tensors_index):
-        return_list = list()
-        for tensor_idx in tf_tensors_index:
-            # in case of optional input, tensor_idx == -1
-            if (tensor_idx < 0):
-                return_list.append(Tensor(tensor_idx, None, None))
-                continue
-            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
-            buffer_idx = tf_tensor.Buffer()
-            tf_buffer = self.tf_model.Buffers(buffer_idx)
-            return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
-        return return_list
-
-    def GetAllTensors(self):
-        return_list = list()
-        for tensor_idx in range(self.tf_subgraph.TensorsLength()):
-            if (tensor_idx < 0):
-                return_list.append(Tensor(tensor_idx, 0, 0))
-                continue
-            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
-            buffer_idx = tf_tensor.Buffer()
-            tf_buffer = self.tf_model.Buffers(buffer_idx)
-            return_list.append(Tensor(tensor_idx, tf_tensor, tf_buffer))
-        return return_list
-
-    def AppendOperator(self, operator):
-        self.operators_in_list.append(operator)
-
-        opcode_str = operator.opcode_str
-        if opcode_str not in self.operators_per_type:
-            self.operators_per_type[opcode_str] = list()
-        self.operators_per_type[opcode_str].append(operator)
diff --git a/tools/tflitefile_tool/operator_printer.py b/tools/tflitefile_tool/operator_printer.py

deleted file mode 100755 (executable)

index e7c5533..0000000
--- a/tools/tflitefile_tool/operator_printer.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_wrapping import Operator
-from tensor_printer import TensorPrinter
-from option_printer import OptionPrinter
-
-
-def GetStrTensorIndex(tensors):
-    return_string = "["
-    for idx in range(len(tensors)):
-        if idx != 0:
-            return_string += ", "
-        return_string += str(tensors[idx].tensor_idx)
-    return_string += "]"
-    return return_string
-
-
-class OperatorPrinter(object):
-    def __init__(self, verbose, operator):
-        self.verbose = verbose
-        self.operator = operator
-
-    def PrintInfo(self):
-        if (self.verbose < 1):
-            return
-
-        op_str = "Operator {0}: {1}".format(self.operator.operator_idx,
-                                            self.operator.opcode_str)
-
-        print(op_str)
-        print("\tFused Activation: " + self.operator.fused_activation)
-        self.PrintTensors()
-
-    def PrintTensors(self):
-        print("\tInput Tensors" + GetStrTensorIndex(self.operator.inputs))
-        for tensor in self.operator.inputs:
-            TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
-        print("\tOutput Tensors" + GetStrTensorIndex(self.operator.outputs))
-        for tensor in self.operator.outputs:
-            TensorPrinter(self.verbose, tensor).PrintInfo("\t\t")
-
-        # operator option
-        # Some operations does not have option. In such case no option is printed
-        OptionPrinter(self.verbose, self.operator.opcode_str,
-                      self.operator.options).PrintInfo("\t")
diff --git a/tools/tflitefile_tool/operator_wrapping.py b/tools/tflitefile_tool/operator_wrapping.py

deleted file mode 100755 (executable)

index 64bad1f..0000000
--- a/tools/tflitefile_tool/operator_wrapping.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Operator
-import tflite.OperatorCode
-import tflite.BuiltinOperator
-import tflite.ActivationFunctionType
-from operation import Operation
-
-
-# Match enum value integer to name string
-# Assumption 1: enum value is defined by old style (can be used on python 2)
-# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
-# Assumption 3: only integer value is set by constant definition
-def BuildEnumClassStrMap(obj):
-    ret = {}
-    for fieldName in dir(obj):
-        if (not fieldName.startswith('_')):
-            fieldValue = getattr(obj, fieldName)
-            if (isinstance(fieldValue, (int))):
-                ret[fieldValue] = fieldName
-    return ret
-
-
-class EnumStrMaps():
-    BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
-    ActivationFunctionType = BuildEnumClassStrMap(
-        tflite.ActivationFunctionType.ActivationFunctionType())
-    BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
-
-
-def GetAttribute(o, *args):
-    import functools
-    return functools.reduce(getattr, args, o)
-
-
-def BuildBuiltinOptionGen():
-    bo_gen = {}
-    for val_enum in EnumStrMaps.BuiltinOptions:
-        val_str = EnumStrMaps.BuiltinOptions[val_enum]
-        try:
-            # Dynamically import Builtin Option classes
-            # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
-            module = __import__("tflite." + val_str)
-            bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
-        except ImportError as e:
-            assert val_enum == 0 and val_str == "NONE"
-    return bo_gen
-
-
-class OptionLoader:
-    builtinOptionGen = BuildBuiltinOptionGen()
-
-    @staticmethod
-    def GetBuiltinOptions(options_type, options_table):
-        if (options_table == None) and (options_type != 0):
-            print(
-                "Bad flatbuffer file: undefined builtin option table with defined option type"
-            )
-            exit(1)
-        options = OptionLoader.builtinOptionGen[options_type]()
-        options.Init(options_table.Bytes, options_table.Pos)
-        return options
-
-
-class Operator(object):
-    def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
-                 opcode_str):
-        self.operator_idx = operator_idx
-        self.tf_operator = tf_operator
-        self.inputs = input_tensors
-        self.outputs = output_tensors
-        self.opcode_str = opcode_str
-        self.operation = Operation(self.tf_operator, self.opcode_str, self.inputs,
-                                   self.outputs)
-        self.fused_activation = "NONE"
-        self.SetupBuiltinOption()
-        self.SetupFusedActivation()
-
-    def SetupBuiltinOption(self):
-        try:
-            self.options = OptionLoader.GetBuiltinOptions(
-                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
-        except KeyError:
-            self.options = 0
-            return
-
-    def SetupFusedActivation(self):
-        # FIXME: workaround for ops such as custom
-        try:
-            options = OptionLoader.GetBuiltinOptions(
-                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
-        except KeyError:
-            return
-
-        # fused activation function
-        try:
-            activation_code = options.FusedActivationFunction()
-            self.fused_activation = EnumStrMaps.ActivationFunctionType[activation_code]
-        except AttributeError:
-            # This operator does not support FusedActivationFunction
-            pass
diff --git a/tools/tflitefile_tool/option_printer.py b/tools/tflitefile_tool/option_printer.py

deleted file mode 100755 (executable)

index 15265ad..0000000
--- a/tools/tflitefile_tool/option_printer.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-class OptionPrinter(object):
-    def __init__(self, verbose, op_name, options):
-        self.verbose = verbose
-        self.op_name = op_name
-        self.options = options
-
-    def GetPadding(self):
-        if self.options.Padding() == 0:
-            return "SAME"
-        elif self.options.Padding() == 1:
-            return "VALID"
-        else:
-            return "** wrong padding value **"
-
-    def PrintInfo(self, tab=""):
-        if (self.verbose < 1):
-            pass
-        if (self.options == 0):
-            return
-
-        option_str = self.GetOptionString()
-        if option_str:
-            print("{}Options".format(tab))
-            print("{}\t{}".format(tab, option_str))
-
-    def GetOptionString(self):
-        if (self.op_name == "AVERAGE_POOL_2D" or self.op_name == "MAX_POOL_2D"):
-            return "{}, {}, {}".format(
-                "Filter W:H = {}:{}".format(self.options.FilterWidth(),
-                                            self.options.FilterHeight()),
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                            self.options.StrideH()),
-                "Padding = {}".format(self.GetPadding()))
-        elif (self.op_name == "CONV_2D"):
-            return "{}, {}, {}".format(
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                            self.options.StrideH()),
-                "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
-                                              self.options.DilationHFactor()),
-                "Padding = {}".format(self.GetPadding()))
-        elif (self.op_name == "DEPTHWISE_CONV_2D"):
-            # yapf: disable
-            return "{}, {}, {}, {}".format(
-                "Stride W:H = {}:{}".format(self.options.StrideW(),
-                                                 self.options.StrideH()),
-                "Dilation W:H = {}:{}".format(self.options.DilationWFactor(),
-                                              self.options.DilationHFactor()),
-                "Padding = {}".format(self.GetPadding()),
-                "DepthMultiplier = {}".format(self.options.DepthMultiplier()))
-            # yapf: enable
diff --git a/tools/tflitefile_tool/parser/__init__.py b/tools/tflitefile_tool/parser/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tools/tflitefile_tool/parser/model_parser.py b/tools/tflitefile_tool/parser/model_parser.py

new file mode 100755 (executable)

index 0000000..68cd31a
--- /dev/null
+++ b/tools/tflitefile_tool/parser/model_parser.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from parser.tflite.tflite_parser import TFLiteParser
+
+
+class ModelParser(object):
+    def __init__(self, model_file):
+        self.parser = None
+        # model_file: _io.BufferedReader
+        if model_file.name.endswith("tflite"):
+            self.parser = TFLiteParser(model_file)
+        # TODO: Add more parser
+
+    def Parse(self):
+        if self.parser is None:
+            raise NotImplementedError
+        return self.parser.Parse()
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py

new file mode 100644 (file)

index 0000000..6a3a205
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_enum_str_maps.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.BuiltinOperator
+import tflite.ActivationFunctionType
+import tflite.BuiltinOptions
+
+
+# Match enum value integer to name string
+# Assumption 1: enum value is defined by old style (can be used on python 2)
+# Assumption 2: when class define enum value, only constant value is defined and methods are not defined
+# Assumption 3: only integer value is set by constant definition
+def BuildEnumClassStrMap(obj):
+    ret = {}
+    for fieldName in dir(obj):
+        if (not fieldName.startswith('_')):
+            fieldValue = getattr(obj, fieldName)
+            if (isinstance(fieldValue, (int))):
+                ret[fieldValue] = fieldName
+    return ret
+
+
+class EnumStrMaps():
+    BuiltinOpcode = BuildEnumClassStrMap(tflite.BuiltinOperator.BuiltinOperator())
+    ActivationFunctionType = BuildEnumClassStrMap(
+        tflite.ActivationFunctionType.ActivationFunctionType())
+    BuiltinOptions = BuildEnumClassStrMap(tflite.BuiltinOptions.BuiltinOptions())
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_operator.py b/tools/tflitefile_tool/parser/tflite/tflite_operator.py

new file mode 100755 (executable)

index 0000000..211007e
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_operator.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.operator import Operator
+from .tflite_enum_str_maps import EnumStrMaps
+from .tflite_option import OptionLoader, GetStringOptions
+
+
+class TFLiteOperator(Operator):
+    def __init__(self, operator_idx, tf_operator, input_tensors, output_tensors,
+                 opcode_str):
+        super(TFLiteOperator, self).__init__()
+
+        self.index = operator_idx
+        self.inputs = input_tensors
+        self.outputs = output_tensors
+        self.op_name = opcode_str
+        self.activation = "NONE"
+        self.options = ""
+
+        self.tf_operator = tf_operator
+        self.tf_options = None
+        self.SetupBuiltinOption()
+        self.SetupFusedActivation()
+
+    def SetupBuiltinOption(self):
+        # FIXME: workaround for ops such as custom
+        try:
+            self.tf_options = OptionLoader.GetBuiltinOptions(
+                self.tf_operator.BuiltinOptionsType(), self.tf_operator.BuiltinOptions())
+            if self.tf_options == None:
+                return
+
+            option_str = GetStringOptions(self.op_name, self.tf_options)
+            if option_str is None:
+                return
+
+            self.options = option_str
+        except KeyError:
+            return
+
+    def SetupFusedActivation(self):
+        if self.tf_options == None:
+            return
+        try:
+            activation_code = self.tf_options.FusedActivationFunction()
+            self.activation = EnumStrMaps.ActivationFunctionType[activation_code]
+        except AttributeError:
+            # This operator does not support FusedActivationFunction
+            pass
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_option.py b/tools/tflitefile_tool/parser/tflite/tflite_option.py

new file mode 100644 (file)

index 0000000..b85fbae
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_option.py
@@ -0,0 +1,96 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .tflite_enum_str_maps import EnumStrMaps
+
+
+def GetAttribute(o, *args):
+    import functools
+    return functools.reduce(getattr, args, o)
+
+
+def BuildBuiltinOptionGen():
+    bo_gen = {}
+    for val_enum in EnumStrMaps.BuiltinOptions:
+        val_str = EnumStrMaps.BuiltinOptions[val_enum]
+        try:
+            # Dynamically import Builtin Option classes
+            # 0 (NONE) is the only exception that does not have no corresponding flatbuffer-generated class
+            module = __import__("tflite." + val_str)
+            bo_gen[val_enum] = GetAttribute(module, val_str, val_str)
+        except ImportError as e:
+            assert val_enum == 0 and val_str == "NONE"
+    return bo_gen
+
+
+class OptionLoader:
+    builtinOptionGen = BuildBuiltinOptionGen()
+
+    @staticmethod
+    def GetBuiltinOptions(options_type, options_table):
+        if (options_table == None) and (options_type != 0):
+            print(
+                "Bad flatbuffer file: undefined builtin option table with defined option type"
+            )
+            exit(1)
+        options = OptionLoader.builtinOptionGen[options_type]()
+        options.Init(options_table.Bytes, options_table.Pos)
+        return options
+
+
+def GetStringPadding(options):
+    if options.Padding() == 0:
+        return "SAME"
+    elif options.Padding() == 1:
+        return "VALID"
+    else:
+        return "** wrong padding value **"
+
+
+def GetStringOptions(op_name, options):
+    if (op_name == "AVERAGE_POOL_2D" or op_name == "MAX_POOL_2D"):
+        return "{}, {}, {}".format(
+            "Filter W:H = {}:{}".format(options.FilterWidth(), options.FilterHeight()),
+            "Stride W:H = {}:{}".format(options.StrideW(),
+                                        options.StrideH()), "Padding = {}".format(
+                                            GetStringPadding(options)))
+    elif (op_name == "CONV_2D"):
+        return "{}, {}, {}".format(
+            "Stride W:H = {}:{}".format(options.StrideW(), options.StrideH()),
+            "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+                                          options.DilationHFactor()),
+            "Padding = {}".format(GetStringPadding(options)))
+    elif (op_name == "DEPTHWISE_CONV_2D"):
+        # yapf: disable
+        return "{}, {}, {}, {}".format(
+            "Stride W:H = {}:{}".format(options.StrideW(),
+                                                options.StrideH()),
+            "Dilation W:H = {}:{}".format(options.DilationWFactor(),
+                                            options.DilationHFactor()),
+            "Padding = {}".format(GetStringPadding(options)),
+            "DepthMultiplier = {}".format(options.DepthMultiplier()))
+        # yapf: enable
+    elif (op_name == "STRIDED_SLICE"):
+        # yapf: disable
+        return "{}, {}, {}, {}, {}".format(
+            "begin_mask({})".format(options.BeginMask()),
+            "end_mask({})".format(options.EndMask()),
+            "ellipsis_mask({})".format(options.EllipsisMask()),
+            "new_axis_mask({})".format(options.NewAxisMask()),
+            "shrink_axis_mask({})".format(options.ShrinkAxisMask()))
+        # yapf: enable
+    else:
+        return None
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_parser.py b/tools/tflitefile_tool/parser/tflite/tflite_parser.py

new file mode 100755 (executable)

index 0000000..6a8f2b8
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_parser.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tflite.Model
+from .tflite_subgraph import TFLiteSubgraph
+from .tflite_operator import TFLiteOperator, EnumStrMaps
+from .tflite_tensor import TFLiteTensor, SetTensorTypeStr
+
+
+def HasOptionalTensor(tf_subgraph):
+    for operator_idx in range(tf_subgraph.OperatorsLength()):
+        tf_operator = tf_subgraph.Operators(operator_idx)
+        if -1 in tf_operator.InputsAsNumpy():
+            return True
+        output_tensors = tf_operator.OutputsAsNumpy()
+        if -1 in tf_operator.OutputsAsNumpy():
+            return True
+
+    return False
+
+
+class TFLiteSubgraphParser(object):
+    def __init__(self, tf_model, subgraph_index):
+        self.tf_model = tf_model
+        self.tf_subgraph = tf_model.Subgraphs(subgraph_index)
+        self.subg = TFLiteSubgraph(subgraph_index, self.tf_subgraph)
+
+        # Tensor type string table
+        SetTensorTypeStr()
+
+    def Parse(self):
+        if HasOptionalTensor(self.tf_subgraph):
+            # Prepare that optional input and output tensors are indicated by -1
+            self.subg.tensors_map[-1] = TFLiteTensor(-1, None, None)
+
+        # tensors
+        for tensor_idx in range(self.tf_subgraph.TensorsLength()):
+            tf_tensor = self.tf_subgraph.Tensors(tensor_idx)
+            buffer_idx = tf_tensor.Buffer()
+            tf_buffer = self.tf_model.Buffers(buffer_idx)
+            t = TFLiteTensor(tensor_idx, tf_tensor, tf_buffer)
+            self.subg.tensors_map[tensor_idx] = t
+
+        # operators
+        for operator_idx in range(self.tf_subgraph.OperatorsLength()):
+            tf_operator = self.tf_subgraph.Operators(operator_idx)
+            op_name = self.GetOpcodeStr(tf_operator)
+            input_tensors = self.GetTensors(tf_operator.InputsAsNumpy())
+            output_tensors = self.GetTensors(tf_operator.OutputsAsNumpy())
+
+            op = TFLiteOperator(operator_idx, tf_operator, input_tensors, output_tensors,
+                                op_name)
+            self.subg.operators_map[op.index] = op
+            self.subg.optypes_map[op.op_name] = op
+
+        self.subg.inputs = self.GetTensors(self.tf_subgraph.InputsAsNumpy())
+        self.subg.outputs = self.GetTensors(self.tf_subgraph.OutputsAsNumpy())
+
+        return self.subg
+
+    def GetOpcodeStr(self, tf_operator):
+        opcode_list_idx = tf_operator.OpcodeIndex()
+        opcode_id = self.tf_model.OperatorCodes(opcode_list_idx).BuiltinCode()
+        opcode_str = EnumStrMaps.BuiltinOpcode[opcode_id]
+        if opcode_id == 32:
+            # Custom operator
+            custom_operator = self.tf_model.OperatorCodes(tf_operator.OpcodeIndex())
+            custom_op_name = custom_operator.CustomCode().decode('utf-8')
+            opcode_str = opcode_str + "(" + custom_op_name + ")"
+        return opcode_str
+
+    def GetTensors(self, tf_tensors_index):
+        assert len(self.subg.tensors_map.keys()) > 0
+
+        return_list = []
+        for tensor_idx in tf_tensors_index:
+            return_list.append(self.subg.tensors_map[tensor_idx])
+        return return_list
+
+
+class TFLiteParser(object):
+    def __init__(self, model_file):
+        self.model_file = model_file
+
+    def Parse(self):
+        # Generate Model: top structure of tflite model file
+        buf = self.model_file.read()
+        buf = bytearray(buf)
+        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+
+        # Model file can have many models
+        subg_list = []
+        for subgraph_index in range(tf_model.SubgraphsLength()):
+            # Parse Subgraphs
+            subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+            subg = subg_parser.Parse()
+            subg_list.append(subg)
+
+        return subg_list
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py

new file mode 100755 (executable)

index 0000000..0c6338e
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_subgraph.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir.subgraph import Subgraph
+
+
+class TFLiteSubgraph(Subgraph):
+    def __init__(self, subg_idx, tf_subgraph):
+        super(TFLiteSubgraph, self).__init__()
+        self.tf_subgraph = tf_subgraph
+
+        self.index = subg_idx
+        if tf_subgraph.Name() is not None:
+            self.subg_name = str(tf_subgraph.Name())
+        self.model_name = "#{0} {1}".format(subg_idx, self.subg_name)
+        if (subg_idx == 0):  # 0th subgraph is main subgraph
+            self.model_name += " (MAIN)"
diff --git a/tools/tflitefile_tool/parser/tflite/tflite_tensor.py b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py

new file mode 100755 (executable)

index 0000000..afd6a27
--- /dev/null
+++ b/tools/tflitefile_tool/parser/tflite/tflite_tensor.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import tflite.Tensor
+import tflite.TensorType
+from ir.tensor import Tensor
+
+TensorTypeList = {}
+
+
+def SetTensorTypeStr():
+    tensorTypeObj = tflite.TensorType.TensorType()
+
+    for fieldName in dir(tensorTypeObj):
+        if (not fieldName.startswith('_')):
+            fieldValue = getattr(tensorTypeObj, fieldName)
+            if (isinstance(fieldValue, (int))):
+                TensorTypeList[fieldValue] = fieldName
+
+
+TYPES_SIZE = {
+    'BOOL': 1,
+    'COMPLEX64': 8,
+    'FLOAT16': 2,
+    'FLOAT32': 4,
+    'INT16': 2,
+    'INT32': 4,
+    'INT64': 8,
+    'UINT8': 1,
+    'NONE': 0,
+}
+
+
+def GetTypeSize(type_name):
+    try:
+        return TYPES_SIZE[type_name]
+
+    except KeyError as error:
+        return 0
+
+
+TYPE_TO_NPTYPE = {
+    'BOOL': np.bool,
+    'COMPLEX64': np.cdouble,
+    'FLOAT16': np.float16,
+    'FLOAT32': np.float32,
+    'INT16': np.int16,
+    'INT32': np.int32,
+    'INT64': np.int64,
+    'UINT8': np.uint8,
+}
+
+
+def ConvertProperNPArrayType(np_arr, np_shape, type_name):
+    try:
+        return np_arr.view(TYPE_TO_NPTYPE[type_name]).reshape(np_shape)
+    except KeyError as error:
+        return np_arr.view().reshape(np_shape)
+
+
+class TFLiteTensor(Tensor):
+    def __init__(self, tensor_idx, tf_tensor, tf_buffer):
+        super(TFLiteTensor, self).__init__()
+        self.tf_tensor = tf_tensor
+        self.tf_buffer = tf_buffer
+
+        self.index = int(tensor_idx)
+        self.tensor = tf_tensor
+
+        # optional input
+        if self.index == -1:
+            self.type_name = "NONE"
+        # general input
+        else:
+            assert tf_tensor is not None
+            assert tf_buffer is not None
+            self.tensor_name = str(tf_tensor.Name())
+            self.type_name = TensorTypeList[tf_tensor.Type()]
+            self.buffer_index = tf_tensor.Buffer()
+            if (tf_buffer.DataLength() > 0):
+                self.buffer = ConvertProperNPArrayType(tf_buffer.DataAsNumpy(),
+                                                       tf_tensor.ShapeAsNumpy(),
+                                                       self.type_name)
+
+            # shape: Empty list([]) will mean Scalar
+            for shape_idx in range(tf_tensor.ShapeLength()):
+                # when shape signature is -1, that means unknown dim
+                if tf_tensor.ShapeSignature(shape_idx) != -1:
+                    self.shape.append(int(tf_tensor.Shape(shape_idx)))
+                else:
+                    self.shape.append(-1)
+
+        self.memory_size = self.GetMemorySize()
+
+    def GetMemorySize(self):
+        type_size = GetTypeSize(self.type_name)
+        if type_size == 0:
+            return 0
+
+        # memory size in bytes
+        size = int(type_size)
+        shape_length = len(self.shape)
+        if shape_length == 0:
+            return size
+
+        for shape_idx in range(shape_length):
+            shape_size = int(self.shape[shape_idx])
+            size *= shape_size
+
+        return size
diff --git a/tools/tflitefile_tool/printer/__init__.py b/tools/tflitefile_tool/printer/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tools/tflitefile_tool/printer/string_builder.py b/tools/tflitefile_tool/printer/string_builder.py

new file mode 100644 (file)

index 0000000..d765420
--- /dev/null
+++ b/tools/tflitefile_tool/printer/string_builder.py
@@ -0,0 +1,175 @@
+#!/usr/bin/python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+UNIT_SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+CHAR_SYMBOLS = {'operator': '#', 'tensor': '%', 'buffer': '&'}
+
+
+def ConvertBytesToHuman(n):
+    n = int(n)
+    if n < 0:
+        return 0
+
+    format_str = "%(val)3.1f%(symb)s"
+    prefix = {}
+    for i, s in enumerate(UNIT_SYMBOLS[1:]):
+        prefix[s] = 1 << (i + 1) * 10
+
+    for symbol in reversed(UNIT_SYMBOLS[1:]):
+        if n >= prefix[symbol]:
+            v = float(n) / prefix[symbol]
+            return format_str % dict(symb=symbol, val=v)
+
+    return format_str % dict(symb=UNIT_SYMBOLS[0], val=n)
+
+
+def GetStringTensorIndex(tensors):
+    return_string = []
+    return_string.append("[")
+    for idx in range(len(tensors)):
+        if idx != 0:
+            return_string.append(", ")
+        return_string.append(CHAR_SYMBOLS['tensor'] + str(tensors[idx].index))
+    return_string.append("]")
+    return "".join(return_string)
+
+
+def GetStringShape(tensor):
+    shape_len = len(tensor.shape)
+    if shape_len == 0:
+        return "Scalar"
+    return_string = []
+    return_string.append("[")
+    for shape_idx in range(shape_len):
+        if (shape_idx != 0):
+            return_string.append(", ")
+        return_string.append(str(tensor.shape[shape_idx]))
+    return_string.append("]")
+    return "".join(return_string)
+
+
+def GetStringTensor(tensor):
+    info = ""
+    if tensor.index < 0:
+        info = "{:5} : {}".format(CHAR_SYMBOLS['tensor'] + str(tensor.index),
+                                  "(OPTIONAL)")
+    else:
+        shape_str = GetStringShape(tensor)
+        type_name = tensor.type_name
+        shape_name = tensor.tensor_name
+        memory_size = ConvertBytesToHuman(tensor.memory_size)
+
+        buffer = ["("]
+        if tensor.buffer is not None:
+            buffer.append(
+                "{:5}: ".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+            # if too big, just skip it.
+            if tensor.buffer.size > 4:
+                buffer.append("".join(['[' for _ in range(tensor.buffer.ndim)]))
+                buffer.append(" ... ")
+                buffer.append("".join([']' for _ in range(tensor.buffer.ndim)]))
+            else:
+                buffer.append(
+                    np.array2string(
+                        tensor.buffer,
+                        precision=3,
+                        separator=', ',
+                        threshold=4,
+                        edgeitems=2))
+        else:
+            buffer.append("Empty")
+        buffer.append(")")
+        buffer_str = "".join(buffer)
+
+        info = "{:5} : buffer {:25} | {:7} | Memory {:6} | Shape {} ({})".format(
+            CHAR_SYMBOLS['tensor'] + str(tensor.index), buffer_str, type_name,
+            memory_size, shape_str, shape_name)
+    return info
+
+
+def GetStringBuffer(tensor):
+    buffer = []
+    buffer.append("Buffer {:5}".format(CHAR_SYMBOLS['buffer'] + str(tensor.buffer_index)))
+    buffer.append("\n")
+    buffer.append(np.array2string(tensor.buffer, separator=', '))
+    return "".join(buffer)
+
+
+class StringBuilder(object):
+    def __init__(self, spacious_str="  "):
+        self.spacious_str = spacious_str
+
+    def GraphStats(self, stats):
+        results = []
+
+        results.append("{:38}: {:4}".format("Number of all operator types",
+                                            len(stats.op_counts)))
+
+        # op type stats
+        for op_name in sorted(stats.op_counts.keys()):
+            occur = stats.op_counts[op_name]
+            optype_info_str = "{:38}: {:4}".format(self.spacious_str + op_name, occur)
+            results.append(optype_info_str)
+
+        summary_str = "{0:38}: {1:4}".format("Number of all operators",
+                                             sum(stats.op_counts.values()))
+        results.append(summary_str)
+        results.append('')
+
+        # memory stats
+        results.append("Expected TOTAL  memory: {}".format(
+            ConvertBytesToHuman(stats.total_memory)))
+        results.append("Expected FILLED memory: {}".format(
+            ConvertBytesToHuman(stats.filled_memory)))
+
+        return "\n".join(results)
+
+    def Operator(self, operator):
+        results = []
+        results.append("{} {}".format(CHAR_SYMBOLS['operator'] + str(operator.index),
+                                      operator.op_name))
+        results.append("{}Fused Activation: {}".format(self.spacious_str,
+                                                       operator.activation))
+        results.append("{}Input Tensors{}".format(self.spacious_str,
+                                                  GetStringTensorIndex(operator.inputs)))
+        for tensor in operator.inputs:
+            results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+        results.append("{}Output Tensors{}".format(self.spacious_str,
+                                                   GetStringTensorIndex(
+                                                       operator.outputs)))
+        for tensor in operator.outputs:
+            results.append(self.Tensor(tensor, self.spacious_str + self.spacious_str))
+        # operator option
+        # Some operations does not have option. In such case no option is printed
+        if operator.options != None and operator.options != "":
+            results.append(self.Option(operator.options, self.spacious_str))
+        return "\n".join(results)
+
+    def Tensor(self, tensor, depth_str=""):
+        results = []
+        results.append("{}{}".format(depth_str, GetStringTensor(tensor)))
+        return "".join(results)
+
+    def Option(self, options_str, depth_str=""):
+        results = []
+        results.append("{}Options".format(depth_str))
+        results.append("{}{}{}".format(depth_str, self.spacious_str, options_str))
+        return "\n".join(results)
+
+    def Buffer(self, tensor, depth_str=""):
+        return "{}{}".format(depth_str, GetStringBuffer(tensor))
diff --git a/tools/tflitefile_tool/printer/subgraph_printer.py b/tools/tflitefile_tool/printer/subgraph_printer.py

new file mode 100755 (executable)

index 0000000..51d8453
--- /dev/null
+++ b/tools/tflitefile_tool/printer/subgraph_printer.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ir import graph_stats
+from .string_builder import StringBuilder
+
+
+class SubgraphPrinter(object):
+    def __init__(self, verbose, subg, spacious_str="  "):
+        self.verbose = verbose
+        self.subg = subg
+        self.spacious_str = spacious_str
+        self.print_all_tensor = True
+        self.print_tensor_index_list = None
+        self.print_all_operator = True
+        self.print_operator_index_list = None
+
+    def SetPrintSpecificTensors(self, tensor_indices):
+        if len(tensor_indices) != 0:
+            self.print_all_tensor = False
+            self.print_tensor_index_list = tensor_indices
+
+    def SetPrintSpecificOperators(self, operator_indices):
+        if len(operator_indices) != 0:
+            self.print_all_operator = False
+            self.print_operator_index_list = operator_indices
+
+    def PrintInfo(self):
+        if self.print_all_tensor == True and self.print_all_operator == True:
+            print("[" + self.subg.model_name + "]")
+            print('')
+            if self.verbose > 0:
+                self.PrintModelInfo()
+                print('')
+                self.PrintOperators()
+            if self.verbose == 2:
+                self.PrintBuffers()
+            self.PrintGraphStats()
+
+        if self.verbose == 0:
+            return
+
+        if self.print_all_tensor == False:
+            print('')
+            self.PrintSpecificTensors(self.print_tensor_index_list)
+            print('')
+
+        if self.print_all_operator == False:
+            print('')
+            self.PrintSpecificOperators(self.print_operator_index_list)
+            print('')
+
+    def PrintModelInfo(self):
+        model_inputs = []
+        for t in self.subg.inputs:
+            model_inputs.append(t.index)
+        model_outputs = []
+        for t in self.subg.outputs:
+            model_outputs.append(t.index)
+        print(self.subg.model_name + " input tensors: " + str(model_inputs))
+        self.PrintSpecificTensors(model_inputs, "    ")
+        print(self.subg.model_name + " output tensors: " + str(model_outputs))
+        self.PrintSpecificTensors(model_outputs, "    ")
+
+    def PrintOperators(self):
+        for index, operator in self.subg.operators_map.items():
+            info = StringBuilder(self.spacious_str).Operator(operator)
+            print(info)
+            print()
+
+    def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
+        for index in print_tensor_index_list:
+            tensor = self.subg.tensors_map[index]
+            info = StringBuilder(self.spacious_str).Tensor(tensor, depth_str)
+            print(info)
+
+    def PrintSpecificOperators(self, print_operator_index_list):
+        for index in print_operator_index_list:
+            operator = self.subg.operators_map[index]
+            info = StringBuilder(self.spacious_str).Operator(operator)
+            print(info)
+
+    def PrintGraphStats(self):
+        stats = graph_stats.CalcGraphStats(self.subg)
+        info = StringBuilder(self.spacious_str).GraphStats(stats)
+        print(info)
+
+    def PrintBuffers(self):
+        for index, tensor in self.subg.tensors_map.items():
+            if tensor.buffer is not None:
+                info = StringBuilder(self.spacious_str).Buffer(tensor)
+                print(info)
+                print()
diff --git a/tools/tflitefile_tool/saver/__init__.py b/tools/tflitefile_tool/saver/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tools/tflitefile_tool/saver/config_saver.py b/tools/tflitefile_tool/saver/config_saver.py

new file mode 100755 (executable)

index 0000000..fa35969
--- /dev/null
+++ b/tools/tflitefile_tool/saver/config_saver.py
@@ -0,0 +1,122 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from printer.string_builder import GetStringShape
+
+
+# TODO: Revise it as minimized `write` methods by using `StringBuilder`
+class ConfigSaver(object):
+    def __init__(self, file_name, operator):
+        self.file_name = file_name
+        self.operator = operator
+        # Set self.verbose to 1 level to print more information
+        self.verbose = 1
+        self.op_idx = operator.index
+        self.op_name = operator.op_name
+        self.options = operator.tf_options
+
+        self.f = open(file_name, 'at')
+
+    def __del__(self):
+        self.f.close()
+
+    def SaveInfo(self):
+        self.f.write("[{}]\n".format(self.op_idx))
+        if (self.op_name == 'CONV_2D'):
+            self.SaveConv2DInputs()
+        else:
+            self.SaveInputs()
+
+        self.SaveOutputs()
+
+        self.SaveAttributes()
+
+        self.f.write('\n')
+
+    def SaveConv2DInputs(self):
+        if (len(self.operator.inputs) != 3):
+            raise AssertionError('Conv2D input count should be 3')
+
+        input = self.operator.inputs[0]
+        weight = self.operator.inputs[1]
+        bias = self.operator.inputs[2]
+
+        self.f.write("input: {}\n".format(GetStringShape(input)))
+        self.f.write("input_type: {}\n".format(input.type_name))
+        self.f.write("weights: {}\n".format(GetStringShape(weight)))
+        self.f.write("weights_type: {}\n".format(weight.type_name))
+        self.f.write("bias: {}\n".format(GetStringShape(bias)))
+        self.f.write("bias_type: {}\n".format(bias.type_name))
+
+    def SaveInputs(self):
+        total = len(self.operator.inputs)
+        self.f.write("input_counts: {}\n".format(total))
+        for idx in range(total):
+            tensor = self.operator.inputs[idx]
+            input_shape_str = GetStringShape(tensor)
+            self.f.write("input{}: {}\n".format(idx, input_shape_str))
+            self.f.write("input{}_type: {}\n".format(idx, tensor.type_name))
+
+    def SaveOutputs(self):
+        total = len(self.operator.outputs)
+        self.f.write("output_counts: {}\n".format(total))
+        for idx in range(total):
+            tensor = self.operator.outputs[idx]
+            output_shape_str = GetStringShape(tensor)
+            self.f.write("output{}: {}\n".format(idx, output_shape_str))
+            self.f.write("output{}_type: {}\n".format(idx, tensor.type_name))
+
+    def SaveFilter(self):
+        self.f.write("filter_w: {}\n".format(self.options.FilterWidth()))
+        self.f.write("filter_h: {}\n".format(self.options.FilterHeight()))
+
+    def SaveStride(self):
+        self.f.write("stride_w: {}\n".format(self.options.StrideW()))
+        self.f.write("stride_h: {}\n".format(self.options.StrideH()))
+
+    def SaveDilation(self):
+        self.f.write("dilation_w: {}\n".format(self.options.DilationWFactor()))
+        self.f.write("dilation_h: {}\n".format(self.options.DilationHFactor()))
+
+    def SavePadding(self):
+        if self.options.Padding() == 0:
+            self.f.write("padding: SAME\n")
+        elif self.options.Padding() == 1:
+            self.f.write("padding: VALID\n")
+
+    def SaveFusedAct(self):
+        if self.operator.activation is not "NONE":
+            self.f.write("fused_act: {}\n".format(self.operator.activation))
+
+    def SaveAttributes(self):
+        if self.op_name == 'AVERAGE_POOL_2D' or self.op_name == 'MAX_POOL_2D':
+            self.SaveFilter()
+            self.SaveStride()
+            self.SavePadding()
+        elif self.op_name == 'CONV_2D':
+            self.SaveStride()
+            self.SaveDilation()
+            self.SavePadding()
+        elif self.op_name == 'TRANSPOSE_CONV':
+            self.SaveStride()
+            self.SavePadding()
+        elif self.op_name == 'DEPTHWISE_CONV_2D':
+            self.SaveStride()
+            self.SaveDilation()
+            self.SavePadding()
+            self.f.write("depthmultiplier: {}\n".format(self.options.DepthMultiplier()))
+
+        self.SaveFusedAct()
diff --git a/tools/tflitefile_tool/saver/model_saver.py b/tools/tflitefile_tool/saver/model_saver.py

new file mode 100755 (executable)

index 0000000..117ec76
--- /dev/null
+++ b/tools/tflitefile_tool/saver/model_saver.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python
+
+# Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .config_saver import ConfigSaver
+
+
+class ModelSaver(object):
+    def __init__(self, subg):
+        self.model_name = subg.model_name
+        self.subg = subg.subg
+
+    def SaveConfigInfo(self, prefix):
+        print("Save model configuration file")
+        for type_str, oper_list in self.subg.optypes_map.items():
+            if prefix:
+                file_name = "{}_{}_{}.config".format(prefix, self.model_name, type_str)
+            else:
+                file_name = "{}_{}.config".format(self.model_name, type_str)
+            print("{} file is generated".format(file_name))
+            with open(file_name, 'wt') as f:
+                f.write("# {}, Total count: {}\n\n".format(type_str, len(oper_list)))
+            for operator in oper_list:
+                ConfigSaver(file_name, operator).SaveInfo()
diff --git a/tools/tflitefile_tool/select_operator.py b/tools/tflitefile_tool/select_operator.py

index dccb3454f5d592f91712f237cac0873312355c39..fdef38c7934d4fb878370830313543a299340933 100755 (executable)
--- a/tools/tflitefile_tool/select_operator.py
+++ b/tools/tflitefile_tool/select_operator.py
@@ -22,6 +22,19 @@ import tflite.Model
  import tflite.SubGraph
  import tflite.BuiltinOptions
  import argparse
+import pkg_resources
+
+
+# On flatbuffers 2.0, EndVector doesn't require length argument any more.
+# But flatbuffers under 2.0 (ex. 1.12) requires length argument.
+# We need this workaround until we abandon flatbuffers 1.12.
+# Reference: https://github.com/google/flatbuffers/issues/6858
+def EndVector(builder, len):
+    flat_version = pkg_resources.get_distribution('flatbuffers').version
+    if pkg_resources.parse_version(flat_version) < pkg_resources.parse_version("2.0"):
+        return builder.EndVector(len)
+    else:
+        return builder.EndVector()
  
  
  # Assume we use only main model in model file
@@ -135,7 +148,7 @@ def GenerateOperatorCodes(new_builder, sample_model, used_opcodes_dic,
      for operator_code_idx in reversed(range(new_operator_code_num)):
          new_builder.PrependUOffsetTRelative(new_operator_code_list[operator_code_idx])
  
-    return new_builder.EndVector(new_operator_code_num)
+    return EndVector(new_builder, new_operator_code_num)
  
  
  def GenerateQuantization(new_builder, selected_quantization):
@@ -146,7 +159,7 @@ def GenerateQuantization(new_builder, selected_quantization):
              new_builder, min_num)
          for min_idx in reversed(range(min_num)):
              new_builder.PrependFloat32(selected_quantization.Min(min_idx))
-        new_min = new_builder.EndVector(min_num)
+        new_min = EndVector(new_builder, min_num)
  
      # Create max vector
      max_num = selected_quantization.MaxLength()
@@ -155,7 +168,7 @@ def GenerateQuantization(new_builder, selected_quantization):
              new_builder, max_num)
          for max_idx in reversed(range(max_num)):
              new_builder.PrependFloat32(selected_quantization.Max(max_idx))
-        new_max = new_builder.EndVector(max_num)
+        new_max = EndVector(new_builder, max_num)
  
      # Create scale vector
      scale_num = selected_quantization.ScaleLength()
@@ -164,7 +177,7 @@ def GenerateQuantization(new_builder, selected_quantization):
              new_builder, scale_num)
          for scale_idx in reversed(range(scale_num)):
              new_builder.PrependFloat32(selected_quantization.Scale(scale_idx))
-        new_scale = new_builder.EndVector(scale_num)
+        new_scale = EndVector(new_builder, scale_num)
  
      # Create zero_point vector
      zeropoint_num = selected_quantization.ZeroPointLength()
@@ -173,7 +186,7 @@ def GenerateQuantization(new_builder, selected_quantization):
              new_builder, zeropoint_num)
          for zeropoint_idx in reversed(range(zeropoint_num)):
              new_builder.PrependInt64(selected_quantization.ZeroPoint(zeropoint_idx))
-        new_zeropoint = new_builder.EndVector(zeropoint_num)
+        new_zeropoint = EndVector(new_builder, zeropoint_num)
  
      # Create quantization
      tflite.QuantizationParameters.QuantizationParametersStart(new_builder)
@@ -204,7 +217,7 @@ def GenerateTensor(new_builder, selected_tensor, used_buffers_dic):
      if shape_num != 0:
          for shape_idx in reversed(range(shape_num)):
              new_builder.PrependInt32(selected_tensor.Shape(shape_idx))
-    new_shape = new_builder.EndVector(shape_num)
+    new_shape = EndVector(new_builder, shape_num)
  
      # Create tensor_type
      tensor_type = selected_tensor.Type()
@@ -268,7 +281,7 @@ def GenerateTensors(new_builder, selected_subgraph, used_tensors_dic, used_buffe
      for new_tensor in reversed(new_tensor_list):
          new_builder.PrependUOffsetTRelative(new_tensor)
  
-    return new_builder.EndVector(new_tensor_num)
+    return EndVector(new_builder, new_tensor_num)
  
  
  def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_type,
@@ -474,7 +487,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
              for new_shape_idx in reversed(range(shape_num)):
                  new_shape_val = reshape_option.NewShape(new_shape_idx)
                  new_builder.PrependInt32(new_shape_val)
-            new_shape = new_builder.EndVector(shape_num)
+            new_shape = EndVector(new_builder, shape_num)
  
          tflite.ReshapeOptions.ReshapeOptionsStart(new_builder)
          if shape_num != 0:
@@ -613,7 +626,7 @@ def GenerateBuiltinOption(new_builder, selected_builtin_option, builtin_option_t
              for squeeze_dims_idx in reversed(range(squeeze_dims_num)):
                  squeeze_dims_val = squeeze_option.SqueezeDims(squeeze_dims_idx)
                  new_builder.PrependInt32(squeeze_dims_val)
-            new_squeeze_dims = new_builder.EndVector(squeeze_dims_num)
+            new_squeeze_dims = EndVector(new_builder, squeeze_dims_num)
  
          tflite.SqueezeOptions.SqueezeOptionsStart(new_builder)
          if squeeze_dims_num != 0:
@@ -997,7 +1010,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
              else:
                  new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
              new_builder.PrependInt32(new_input_tensor_idx)
-        new_input = new_builder.EndVector(input_num)
+        new_input = EndVector(new_builder, input_num)
  
      # create output_vector
      output_num = selected_operator.OutputsLength()
@@ -1007,7 +1020,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
              output_tensor_idx = selected_operator.Outputs(output_idx)
              new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
              new_builder.PrependInt32(new_output_tensor_idx)
-        new_output = new_builder.EndVector(output_num)
+        new_output = EndVector(new_builder, output_num)
  
      # Create builtin_option
      builtin_option_type = selected_operator.BuiltinOptionsType()
@@ -1022,7 +1035,7 @@ def GenerateOperator(new_builder, selected_operator, used_tensors_dic, used_opco
          tflite.Operator.OperatorStartCustomOptionsVector(new_builder, custom_option_num)
          for custom_option_idx in reversed(range(custom_option_num)):
              new_builder.PrependUint8(selected_operator.CustomOptions(custom_option_idx))
-        new_custom_option = new_builder.EndVector(custom_option_num)
+        new_custom_option = EndVector(new_builder, custom_option_num)
  
      # Create custum option type
      custom_option_type = selected_operator.CustomOptionsFormat()
@@ -1067,7 +1080,7 @@ def GenerateOperators(new_builder, selected_subgraph, operator_list, used_tensor
      for new_operator in reversed(new_operator_list):
          new_builder.PrependUOffsetTRelative(new_operator)
  
-    return new_builder.EndVector(new_operator_num)
+    return EndVector(new_builder, new_operator_num)
  
  
  def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_tensor,
@@ -1085,7 +1098,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
          for input_tensor_idx in reversed(new_input_tensor):
              new_input_tensor_idx = used_tensors_dic[input_tensor_idx]
              new_builder.PrependInt32(new_input_tensor_idx)
-        new_inputs = new_builder.EndVector(new_input_tensor_num)
+        new_inputs = EndVector(new_builder, new_input_tensor_num)
  
      # Create output vector for subgraph table
      new_output_tensor_num = len(new_output_tensor)
@@ -1094,7 +1107,7 @@ def GenerateSubgraph(new_builder, selected_subgraph, operator_list, new_input_te
          for output_tensor_idx in reversed(new_output_tensor):
              new_output_tensor_idx = used_tensors_dic[output_tensor_idx]
              new_builder.PrependInt32(new_output_tensor_idx)
-        new_outputs = new_builder.EndVector(new_output_tensor_num)
+        new_outputs = EndVector(new_builder, new_output_tensor_num)
  
      # Operators
      operators = GenerateOperators(new_builder, selected_subgraph, operator_list,
@@ -1161,7 +1174,7 @@ def GenerateSubgraphs(args, new_builder, sample_model, operator_list, new_input_
      for subgraph_idx in reversed(range(new_subgraph_num)):
          new_builder.PrependUOffsetTRelative(new_subgraph_list[subgraph_idx])
  
-    return new_builder.EndVector(new_subgraph_num)
+    return EndVector(new_builder, new_subgraph_num)
  
  
  def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
@@ -1181,7 +1194,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
              tflite.Buffer.BufferStartDataVector(new_builder, buffer_length)
              for buffer_data_idx in reversed(range(buffer_length)):
                  new_builder.PrependUint8(buffer.Data(buffer_data_idx))
-            new_buffer = new_builder.EndVector(buffer_length)
+            new_buffer = EndVector(new_builder, buffer_length)
              new_buffer_data_list[buffer_idx] = new_buffer
  
      # Create tables of buffer
@@ -1205,7 +1218,7 @@ def GenerateBuffers(new_builder, sample_model, used_buffers_dic):
      for new_buffer_idx in reversed(range(new_buffer_num)):
          new_builder.PrependUOffsetTRelative(new_buffer_list[new_buffer_idx])
  
-    return new_builder.EndVector(new_buffer_num)
+    return EndVector(new_builder, new_buffer_num)
  
  
  def GenerateModel(args, new_builder, sample_model, operator_list, new_input_tensors,
diff --git a/tools/tflitefile_tool/subgraph_printer.py b/tools/tflitefile_tool/subgraph_printer.py

deleted file mode 100755 (executable)

index cce7ff5..0000000
--- a/tools/tflitefile_tool/subgraph_printer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from operator_printer import OperatorPrinter
-from tensor_printer import TensorPrinter
-import graph_stats
-
-
-class SubgraphPrinter(object):
-    def __init__(self, verbose, op_parser, model_name):
-        self.verbose = verbose
-        self.op_parser = op_parser
-        self.model_name = model_name
-        self.print_all_tensor = True
-        self.print_tensor_index_list = None
-        self.print_all_operator = True
-        self.print_operator_index_list = None
-
-    def SetPrintSpecificTensors(self, tensor_indices):
-        if len(tensor_indices) != 0:
-            self.print_all_tensor = False
-            self.print_tensor_index_list = tensor_indices
-
-    def SetPrintSpecificOperators(self, operator_indices):
-        if len(operator_indices) != 0:
-            self.print_all_operator = False
-            self.print_operator_index_list = operator_indices
-
-    def PrintInfo(self):
-        if self.print_all_tensor == True and self.print_all_operator == True:
-            self.PrintModelInfo()
-            self.PrintAllOperatorsInList()
-            graph_stats.PrintGraphStats(
-                graph_stats.CalcGraphStats(self.op_parser), self.verbose)
-
-        if self.print_all_tensor == False:
-            print('')
-            self.PrintSpecificTensors(self.print_tensor_index_list)
-            print('')
-
-        if self.print_all_operator == False:
-            print('')
-            self.PrintSpecificOperators(self.print_operator_index_list)
-            print('')
-
-    def PrintModelInfo(self):
-        print("[" + self.model_name + "]\n")
-        if self.verbose > 0:
-            model_inputs = self.op_parser.tf_subgraph.InputsAsNumpy()
-            model_outputs = self.op_parser.tf_subgraph.OutputsAsNumpy()
-            print(self.model_name + " input tensors: " + str(model_inputs))
-            self.PrintSpecificTensors(model_inputs, "\t")
-            print(self.model_name + " output tensors: " + str(model_outputs))
-            self.PrintSpecificTensors(model_outputs, "\t")
-        print('')
-
-    def PrintAllOperatorsInList(self):
-        if (self.verbose < 1):
-            return
-
-        for operator in self.op_parser.operators_in_list:
-            printer = OperatorPrinter(self.verbose, operator)
-            printer.PrintInfo()
-            print('')
-
-        print('')
-
-    def PrintSpecificTensors(self, print_tensor_index_list, depth_str=""):
-        for tensor in self.op_parser.GetTensors(print_tensor_index_list):
-            printer = TensorPrinter(self.verbose, tensor)
-            printer.PrintInfo(depth_str)
-
-    def PrintSpecificOperators(self, print_operator_index_list):
-        for operator in self.op_parser.operators_in_list:
-            if operator.operator_idx in print_operator_index_list:
-                printer = OperatorPrinter(self.verbose, operator)
-                printer.PrintInfo()
diff --git a/tools/tflitefile_tool/tensor_printer.py b/tools/tflitefile_tool/tensor_printer.py

deleted file mode 100755 (executable)

index 108a119..0000000
--- a/tools/tflitefile_tool/tensor_printer.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from tensor_wrapping import Tensor
-
-SYMBOLS = ['B', 'K', 'M', 'G', 'T']
-
-
-def ConvertBytesToHuman(n):
-    n = int(n)
-    if n < 0:
-        return 0
-
-    format_str = "%(val)3.1f%(symb)s"
-    prefix = {}
-    for i, s in enumerate(SYMBOLS[1:]):
-        prefix[s] = 1 << (i + 1) * 10
-
-    for symbol in reversed(SYMBOLS[1:]):
-        if n >= prefix[symbol]:
-            v = float(n) / prefix[symbol]
-            return format_str % dict(symb=symbol, val=v)
-
-    return format_str % dict(symb=SYMBOLS[0], val=n)
-
-
-class TensorPrinter(object):
-    def __init__(self, verbose, tensor):
-        self.verbose = verbose
-        self.tensor = tensor
-
-    def PrintInfo(self, depth_str=""):
-        if (self.verbose < 1):
-            pass
-
-        print_str = ""
-        if self.tensor.tensor_idx < 0:
-            print_str = "Tensor {0:4}".format(self.tensor.tensor_idx)
-        else:
-            buffer_idx = self.tensor.tf_tensor.Buffer()
-            buffer_str = "Empty" if buffer_idx == 0 else str(buffer_idx)
-            isEmpty = "Filled"
-            if (self.tensor.tf_buffer.DataLength() == 0):
-                isEmpty = " Empty"
-            shape_str = self.GetShapeString()
-            type_name = self.tensor.type_name
-
-            shape_name = ""
-            if self.tensor.tf_tensor.Name() != 0:
-                shape_name = self.tensor.tf_tensor.Name()
-
-            memory_size = ConvertBytesToHuman(self.tensor.memory_size)
-
-            print_str = "Tensor {0:4} : buffer {1:5} | {2} | {3:7} | Memory {4:6} | Shape {5} ({6})".format(
-                self.tensor.tensor_idx, buffer_str, isEmpty, type_name, memory_size,
-                shape_str, shape_name)
-        print(depth_str + print_str)
-
-    def GetShapeString(self):
-        if self.tensor.tf_tensor.ShapeLength() == 0:
-            return "Scalar"
-        return_string = "["
-        for shape_idx in range(self.tensor.tf_tensor.ShapeLength()):
-            if (shape_idx != 0):
-                return_string += ", "
-            # when shape signature is -1, that means unknown dim
-            if self.tensor.tf_tensor.ShapeSignature(shape_idx) != -1:
-                return_string += str(self.tensor.tf_tensor.Shape(shape_idx))
-            else:
-                return_string += "-1"
-        return_string += "]"
-        return return_string
diff --git a/tools/tflitefile_tool/tensor_wrapping.py b/tools/tflitefile_tool/tensor_wrapping.py

deleted file mode 100755 (executable)

index 2a6dcac..0000000
--- a/tools/tflitefile_tool/tensor_wrapping.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import tflite.Tensor
-import tflite.TensorType
-
-TensorTypeList = {}
-
-
-def SetTensorTypeStr():
-    tensorTypeObj = tflite.TensorType.TensorType()
-
-    for fieldName in dir(tensorTypeObj):
-        if (not fieldName.startswith('_')):
-            fieldValue = getattr(tensorTypeObj, fieldName)
-            if (isinstance(fieldValue, (int))):
-                TensorTypeList[fieldValue] = fieldName
-
-
-TYPES = {
-    'BOOL': 1,
-    'COMPLEX64': 8,
-    'FLOAT16': 2,
-    'FLOAT32': 4,
-    'INT16': 2,
-    'INT32': 4,
-    'INT64': 8,
-    'UINT8': 1
-}
-
-
-def GetTypeSize(type_name):
-    try:
-        return TYPES[type_name]
-
-    except KeyError as error:
-        return 0
-
-
-class Tensor(object):
-    def __init__(self, tensor_idx, tf_tensor, tf_buffer):
-        self.tensor_idx = tensor_idx
-        self.tf_tensor = tf_tensor
-        self.tf_buffer = tf_buffer
-
-        # optional input
-        if (self.tf_tensor != None):
-            self.type_name = TensorTypeList[self.tf_tensor.Type()]
-        else:
-            self.type_name = None
-
-        self.memory_size = self.GetMemorySize()
-
-    def GetMemorySize(self):
-        type_size = GetTypeSize(self.type_name)
-        if type_size == 0:
-            return 0
-
-        # memory size in bytes
-        size = int(type_size)
-        shape_length = self.tf_tensor.ShapeLength()
-        if shape_length == 0:
-            return size
-
-        for shape_idx in range(shape_length):
-            shape_size = int(self.tf_tensor.Shape(shape_idx))
-            size *= shape_size
-
-        return size
diff --git a/tools/tflitefile_tool/tests/README.md b/tools/tflitefile_tool/tests/README.md

new file mode 100644 (file)

index 0000000..0d1d707
--- /dev/null
+++ b/tools/tflitefile_tool/tests/README.md
@@ -0,0 +1,36 @@
+# How to test
+
+## Prepare
+
+There is `add.tflite` in `ONE/nnpackage/examples/v1.0.0/add`.
+
+```
+ONE$ find ./nnpackage -name "add.tflite"
+./nnpackage/examples/v1.0.0/add/add.tflite
+```
+
+## Test
+
+```
+ONE/tools/tflitefile_tool$ python -m unittest discover
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+OR
+
+```
+ONE/tools/tflitefile_tool$ python ./tests/main.py
+
+----------------------------------------------------------------------
+Ran 1 tests in 0.000s
+
+OK
+```
+
+## Reference
+
+https://docs.python.org/3.6/library/unittest.html
diff --git a/tools/tflitefile_tool/tests/__init__.py b/tools/tflitefile_tool/tests/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/tools/tflitefile_tool/tests/main.py b/tools/tflitefile_tool/tests/main.py

new file mode 100644 (file)

index 0000000..b9c7104
--- /dev/null
+++ b/tools/tflitefile_tool/tests/main.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+if __name__ == '__main__':
+    loader = unittest.TestLoader()
+    tests = loader.discover('.')
+    testRunner = unittest.runner.TextTestRunner()
+    testRunner.run(tests)
diff --git a/tools/tflitefile_tool/tests/test_operator.py b/tools/tflitefile_tool/tests/test_operator.py

new file mode 100644 (file)

index 0000000..7d6fbe8
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_operator.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+from ir.operator import Operator
+
+
+# Test the only getter/setter
+class OperatorTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        op = Operator()
+        op.index = 1000
+        self.assertEqual(op.index, 1000)
+
+    def test_inputs(self):
+        op = Operator()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        op.inputs = [t0, t1]
+        self.assertEqual(len(op.inputs), 2)
+        self.assertEqual(op.inputs[0], t0)
+        self.assertEqual(op.inputs[1], t1)
+
+    def test_outputs(self):
+        op = Operator()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        op.outputs = [t0, t1]
+        self.assertEqual(len(op.outputs), 2)
+        self.assertEqual(op.outputs[0], t0)
+        self.assertEqual(op.outputs[1], t1)
+
+    def test_op_name(self):
+        op = Operator()
+        op.op_name = "ADD"
+        self.assertEqual(op.op_name, "ADD")
+
+    def test_activation(self):
+        op = Operator()
+        op.activation = "Tanh"
+        self.assertEqual(op.activation, "Tanh")
+
+    def test_options(self):
+        op = Operator()
+        op.options = "Options ..."
+        self.assertEqual(op.options, "Options ...")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_setup.py b/tools/tflitefile_tool/tests/test_setup.py

new file mode 100644 (file)

index 0000000..f38a2d6
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_setup.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import unittest
+
+# Python doesn't have const var but handle these as const
+# It's meaning that DO NOT MODIFY these vars
+THIS_FILE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_MODEL_DIR = os.path.join(THIS_FILE_DIR, "../../../nnpackage/examples/v1.0.0/add")
+TEST_MODEL_PATH = os.path.join(TEST_MODEL_DIR, "add.tflite")
+
+
+def Exist_TEST_MODEL_DIR(dir):
+    return os.path.exists(dir) and os.path.isdir(dir)
+
+
+def Exist_TEST_MODEL_FILE(file):
+    return os.path.exists(file) and os.path.isfile(file)
+
+
+class Setup(unittest.TestCase):
+    def test_Exist_TEST_MODEL_DIR(self):
+        model_dir = TEST_MODEL_DIR
+        self.assertTrue(Exist_TEST_MODEL_DIR(model_dir))
+
+    def test_Exist_TEST_MODEL_FILE(self):
+        model_file = TEST_MODEL_PATH
+        self.assertTrue(Exist_TEST_MODEL_FILE(model_file))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_string_builder.py b/tools/tflitefile_tool/tests/test_string_builder.py

new file mode 100644 (file)

index 0000000..97a5809
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_string_builder.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from printer.string_builder import *
+
+
+class StringBuilderTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_ConvertBytesToHuman(self):
+        SYMBOLS = ['B', 'K', 'M', 'G', 'T']
+        format_str = "%(val)3.1f%(symb)s"
+
+        bytes = -1
+        self.assertEqual(ConvertBytesToHuman(bytes), 0)
+
+        bytes = 1
+        self.assertEqual(
+            ConvertBytesToHuman(bytes), format_str % dict(symb=SYMBOLS[0], val=(bytes)))
+
+        bytes = 1024
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[1], val=(bytes / 1024)))
+
+        bytes = 1024**2
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[2], val=(bytes / (1024**2))))
+
+        bytes = 1024**3
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[3], val=(bytes / (1024**3))))
+
+        bytes = 1024**4
+        self.assertEqual(
+            ConvertBytesToHuman(bytes),
+            format_str % dict(symb=SYMBOLS[4], val=(bytes / (1024**4))))
+
+    # TODO: More tests
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_subgraph.py b/tools/tflitefile_tool/tests/test_subgraph.py

new file mode 100644 (file)

index 0000000..7930ed0
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_subgraph.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+# Csubgyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a csubgy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.subgraph import Subgraph
+from ir.operator import Operator
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class SubgraphTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        subg = Subgraph()
+        subg.index = 1000
+        self.assertEqual(subg.index, 1000)
+
+    def test_inputs(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.inputs = [t0, t1]
+        self.assertEqual(len(subg.inputs), 2)
+        self.assertEqual(subg.inputs[0], t0)
+        self.assertEqual(subg.inputs[0].index, 0)
+        self.assertEqual(subg.inputs[1], t1)
+        self.assertEqual(subg.inputs[1].index, 1)
+
+    def test_outputs(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.outputs = [t0, t1]
+        self.assertEqual(len(subg.outputs), 2)
+        self.assertEqual(subg.outputs[0], t0)
+        self.assertEqual(subg.outputs[0].index, 0)
+        self.assertEqual(subg.outputs[1], t1)
+        self.assertEqual(subg.outputs[1].index, 1)
+
+    def test_subg_name(self):
+        subg = Subgraph()
+        subg.subg_name = "SUBGRAPH_0"
+        self.assertEqual(subg.subg_name, "SUBGRAPH_0")
+
+    def test_model_name(self):
+        subg = Subgraph()
+        subg.model_name = "SUBGRAPH_0"
+        self.assertEqual(subg.model_name, "SUBGRAPH_0")
+
+    def test_tensors_map(self):
+        subg = Subgraph()
+        t0 = Tensor()
+        t0.index = 0
+        t1 = Tensor()
+        t1.index = 1
+        subg.tensors_map[t0.index] = t0
+        subg.tensors_map[t1.index] = t1
+        self.assertEqual(len(subg.tensors_map.keys()), 2)
+        self.assertEqual(subg.tensors_map[t0.index], t0)
+        self.assertEqual(subg.tensors_map[t1.index], t1)
+
+    def test_operators_map(self):
+        subg = Subgraph()
+        op0 = Operator()
+        op0.index = 0
+        op0.op_name = "ADD"
+        op1 = Operator()
+        op1.index = 1
+        op1.op_name = "SUB"
+        subg.operators_map[op0.index] = op0
+        subg.operators_map[op1.index] = op1
+        self.assertEqual(len(subg.operators_map.keys()), 2)
+        self.assertEqual(subg.operators_map[op0.index], op0)
+        self.assertEqual(subg.operators_map[op1.index], op1)
+
+    def test_optypes_map(self):
+        subg = Subgraph()
+        op0 = Operator()
+        op0.index = 0
+        op0.op_name = "ADD"
+        op1 = Operator()
+        op1.index = 1
+        op1.op_name = "SUB"
+        op2 = Operator()
+        op2.index = 2
+        op2.op_name = "SUB"
+
+        subg.optypes_map[op0.op_name] = op0
+        subg.optypes_map[op1.op_name] = op1
+        subg.optypes_map[op2.op_name] = op2
+
+        self.assertEqual(len(subg.optypes_map.keys()), 2)
+        self.assertEqual(len(subg.optypes_map[op0.op_name]), 1)
+        self.assertEqual(len(subg.optypes_map[op2.op_name]), 2)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tensor.py b/tools/tflitefile_tool/tests/test_tensor.py

new file mode 100644 (file)

index 0000000..200f495
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tensor.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from ir.tensor import Tensor
+
+
+# Test the only getter/setter
+class TensorTestCase(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_index(self):
+        t = Tensor()
+        t.index = 1000
+        self.assertEqual(t.index, 1000)
+
+    def test_tensor_name(self):
+        t = Tensor()
+        t.tensor_name = "input"
+        self.assertEqual(t.tensor_name, "input")
+
+    def test_buffer(self):
+        t = Tensor()
+        o = object()
+        t.buffer = o
+        self.assertEqual(t.buffer, o)
+
+    def test_buffer_index(self):
+        t = Tensor()
+        t.buffer_index = 1000
+        self.assertEqual(t.buffer_index, 1000)
+
+    def test_type_name(self):
+        t = Tensor()
+        t.type_name = "FLOAT32"
+        self.assertEqual(t.type_name, "FLOAT32")
+
+    def test_shape(self):
+        t = Tensor()
+        t.shape = [1, 2, 3, 4]
+        self.assertEqual(t.shape, [1, 2, 3, 4])
+
+    def test_memory_size(self):
+        t = Tensor()
+        t.memory_size = 1000
+        self.assertEqual(t.memory_size, 1000)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/tflitefile_tool/tests/test_tflite_parser.py b/tools/tflitefile_tool/tests/test_tflite_parser.py

new file mode 100644 (file)

index 0000000..dd1447a
--- /dev/null
+++ b/tools/tflitefile_tool/tests/test_tflite_parser.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import tflite.Model
+from parser.tflite.tflite_parser import TFLiteParser, TFLiteSubgraphParser
+from .test_setup import TEST_MODEL_PATH
+
+
+class TFLiteSubgraphParserTestCase(unittest.TestCase):
+    def setUp(self):
+        self.model_file = open(TEST_MODEL_PATH, 'rb')
+
+    def tearDown(self):
+        self.model_file.close()
+
+    def test_Parse(self):
+        buf = bytearray(self.model_file.read())
+        tf_model = tflite.Model.Model.GetRootAsModel(buf, 0)
+        for subgraph_index in range(tf_model.SubgraphsLength()):
+            tf_subgraph = tf_model.Subgraphs(subgraph_index)
+            subg_parser = TFLiteSubgraphParser(tf_model, subgraph_index)
+            subg = subg_parser.Parse()
+            self.assertEqual(subg.index, subgraph_index)
+            self.assertEqual(len(subg.inputs), tf_subgraph.InputsLength())
+            self.assertEqual(len(subg.outputs), tf_subgraph.OutputsLength())
+            # if there is optional tensors, this assert could be wrong
+            self.assertEqual(len(subg.tensors_map.keys()), tf_subgraph.TensorsLength())
+            self.assertEqual(
+                len(subg.operators_map.keys()), tf_subgraph.OperatorsLength())
+            # because TEST_MODEL_PATH has an op(ADD)
+            self.assertEqual(len(subg.optypes_map.keys()), tf_subgraph.OperatorsLength())
+
+
+class TFLiteParserTestCase(unittest.TestCase):
+    def setUp(self):
+        self.model_file = open(TEST_MODEL_PATH, 'rb')
+        self.parser = TFLiteParser(self.model_file)
+
+    def tearDown(self):
+        self.model_file.close()
+
+    def test_Parse(self):
+        subg_list = self.parser.Parse()
+        self.assertIsNotNone(subg_list)
+        self.assertEqual(len(subg_list), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
author	Chunseok Lee <chunseok.lee@samsung.com>
	Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Fri, 15 Apr 2022 10:15:11 +0000 (19:15 +0900)