Imported Upstream version 1.18.0

author Chunseok Lee <chunseok.lee@samsung.com>

Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)
diff --git a/.ahub/tcchecker-tca/config.yaml b/.ahub/tcchecker-tca/config.yaml

index 9c0a8d881c4ccc6f6e91fa82427d8dcf07a62058..86d272d8a53304faed4713d16673b0d5f2cac2e9 100644 (file)
--- a/.ahub/tcchecker-tca/config.yaml
+++ b/.ahub/tcchecker-tca/config.yaml
@@ -25,21 +25,6 @@ test:
          any: true
        - extension: cc
          any: true
-      - excludes :
-        - DepthwiseConv2D.cc
-        - ArgMinMax.cc
-        - AveragePool2D.cc
-        - Concat.cc
-        - DepthToSpace.cc
-        - DepthwiseConv2D.cc
-        - Fill.cc
-        - If.cc
-        - Pad.cc
-        - Reduce.cc
-        - ResizeBilinear.c
-        - Slice.cc
-        - Softmax.cc
-        - While.cc
      testCase:
        - condition:
          - functionName:
diff --git a/compiler/arser/include/arser/arser.h b/compiler/arser/include/arser/arser.h

index f2a7a2b85899b678a2776493532335e0fbde057b..1703e421e5cb8de2ce0e924bc2e3c06305141f3b 100644 (file)
--- a/compiler/arser/include/arser/arser.h
+++ b/compiler/arser/include/arser/arser.h
@@ -238,6 +238,18 @@ public:
      return *this;
    }
  
+  Argument &accumulated(void)
+  {
+    _is_accumulated = true;
+    return *this;
+  }
+
+  Argument &accumulated(bool value)
+  {
+    _is_accumulated = value;
+    return *this;
+  }
+
    Argument &help(std::string help_message)
    {
      _help_message = help_message;
@@ -296,7 +308,9 @@ private:
    std::function<void(void)> _func;
    uint32_t _nargs{1};
    bool _is_required{false};
+  bool _is_accumulated{false};
    std::vector<std::string> _values;
+  std::vector<std::vector<std::string>> _accum_values;
  
    friend class Arser;
    friend std::ostream &operator<<(std::ostream &, const Arser &);
@@ -403,6 +417,8 @@ public:
          throw std::runtime_error("Invalid arguments. Positional argument must always be required.");
        }
      }
+    // TODO accumulated arguments shouldn't be enabled to positional arguments.
+    // TODO accumulated arguments shouldn't be enabled to optional arguments whose `narg` == 0.
    }
  
    void parse(int argc, char **argv)
@@ -475,6 +491,11 @@ public:
                                       "You must have missed some argument.");
            arg->second->_values.emplace_back(argv[c++]);
          }
+        // accumulate values
+        if (arg->second->_is_accumulated)
+        {
+          arg->second->_accum_values.emplace_back(arg->second->_values);
+        }
          if (arg->second->_nargs == 0)
          {
            // TODO std::boolalpha for true or false
@@ -493,6 +514,9 @@ public:
      if (arg == _arg_map.end())
        return false;
  
+    if (arg->second->_is_accumulated)
+      return arg->second->_accum_values.size() > 0 ? true : false;
+
      return arg->second->_values.size() > 0 ? true : false;
    }
  
@@ -500,6 +524,9 @@ public:
  
    template <typename T> std::vector<T> get_impl(const std::string &arg_name, std::vector<T> *);
  
+  template <typename T>
+  std::vector<std::vector<T>> get_impl(const std::string &arg_name, std::vector<std::vector<T>> *);
+
    template <typename T> T get(const std::string &arg_name);
  
    friend std::ostream &operator<<(std::ostream &stream, const Arser &parser)
@@ -617,6 +644,12 @@ template <typename T> T Arser::get_impl(const std::string &arg_name, T *)
                               "There is no argument you are looking for: " +
                               arg_name);
  
+  if (arg->second->_is_accumulated)
+    throw std::runtime_error(
+      "Type mismatch. "
+      "You called get using a type different from the one you specified."
+      "Accumulated argument is returned as std::vector of the specified type");
+
    if (arg->second->_type != TypeName<T>::Get())
      throw std::runtime_error("Type mismatch. "
                               "You called get() method with a type different "
@@ -640,6 +673,22 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
                               "There is no argument you are looking for: " +
                               arg_name);
  
+  // Accumulated arguments with scalar type (e.g., STR)
+  if (arg->second->_is_accumulated)
+  {
+    if (arg->second->_type != TypeName<T>::Get())
+      throw std::runtime_error("Type mismatch. "
+                               "You called get using a type different from the one you specified.");
+
+    std::vector<T> data;
+    for (auto values : arg->second->_accum_values)
+    {
+      assert(values.size() == 1);
+      data.emplace_back(internal::lexical_cast<T>(values[0]));
+    }
+    return data;
+  }
+
    if (arg->second->_type != TypeName<std::vector<T>>::Get())
      throw std::runtime_error("Type mismatch. "
                               "You called get using a type different from the one you specified.");
@@ -650,6 +699,39 @@ template <typename T> std::vector<T> Arser::get_impl(const std::string &arg_name
    return data;
  }
  
+// Accumulated arguments with vector type (e.g., STR_VEC)
+template <typename T>
+std::vector<std::vector<T>> Arser::get_impl(const std::string &arg_name,
+                                            std::vector<std::vector<T>> *)
+{
+  auto arg = _arg_map.find(arg_name);
+  if (arg == _arg_map.end())
+    throw std::runtime_error("Invalid argument. "
+                             "There is no argument you are looking for: " +
+                             arg_name);
+
+  if (not arg->second->_is_accumulated)
+    throw std::runtime_error("Type mismatch. "
+                             "You called get using a type different from the one you specified.");
+
+  if (arg->second->_type != TypeName<std::vector<T>>::Get())
+    throw std::runtime_error(
+      "Type mismatch. "
+      "You called get using a type different from the one you specified."
+      "Accumulated argument is returned as std::vector of the specified type");
+
+  std::vector<std::vector<T>> result;
+  for (auto values : arg->second->_accum_values)
+  {
+    std::vector<T> data;
+    std::transform(values.begin(), values.end(), std::back_inserter(data),
+                   [](std::string str) -> T { return internal::lexical_cast<T>(str); });
+    result.emplace_back(data);
+  }
+
+  return result;
+}
+
  template <typename T> T Arser::get(const std::string &arg_name)
  {
    return get_impl(arg_name, static_cast<T *>(nullptr));
diff --git a/compiler/arser/tests/arser.test.cpp b/compiler/arser/tests/arser.test.cpp

index b37d0dec316adf160a6d50dcb37dda5c086f9087..4e88f0cb7e46496b4fa7e3cce5dda0db36abb003 100644 (file)
--- a/compiler/arser/tests/arser.test.cpp
+++ b/compiler/arser/tests/arser.test.cpp
@@ -93,7 +93,7 @@ TEST(BasicTest, OptionalArgument)
    EXPECT_THROW(arser.get<bool>("--volume"), std::runtime_error);
  }
  
-TEST(BasicTest, NonRequiredOptionalArgument)
+TEST(BasicTest, NonRequiredOptionalArgument_NEG)
  {
    /* arrange */
    Arser arser;
@@ -111,7 +111,7 @@ TEST(BasicTest, NonRequiredOptionalArgument)
    EXPECT_THROW(arser.get<int>("--weight"), std::runtime_error);
  }
  
-TEST(BasicTest, RequiredOptionalArgument)
+TEST(BasicTest, RequiredOptionalArgument_NEG)
  {
    /* arrange */
    Arser arser;
@@ -395,7 +395,7 @@ TEST(BasicTest, shortMultipleOption)
    EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
  }
  
-TEST(BasicTest, OptWithRequiredDuplicate)
+TEST(BasicTest, OptWithRequiredDuplicate_NEG)
  {
    /* arrange */
    Arser arser;
@@ -441,3 +441,61 @@ TEST(BasicTest, OptWithNonRequiredDuplicate)
    EXPECT_TRUE(arser["--output_path"]);
    EXPECT_EQ("I/am/out.put", arser.get<std::string>("--output_path"));
  }
+
+TEST(BasicTest, AccumulateVectorOptions)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(3).accumulated(true).type(arser::DataType::STR_VEC);
+
+  Prompt prompt("./driver --specify a b c --specify 1 2 3");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  auto specify = arser.get<std::vector<std::vector<std::string>>>("--specify");
+  auto first = specify[0];
+  EXPECT_EQ("a", first.at(0));
+  EXPECT_EQ("b", first.at(1));
+  EXPECT_EQ("c", first.at(2));
+  auto second = specify[1];
+  EXPECT_EQ("1", second.at(0));
+  EXPECT_EQ("2", second.at(1));
+  EXPECT_EQ("3", second.at(2));
+}
+
+TEST(BasicTest, AccumulateScalarOptions)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+  Prompt prompt("./driver --specify 1 --specify 2");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  auto specify = arser.get<std::vector<float>>("--specify");
+  EXPECT_EQ(1, specify.at(0));
+  EXPECT_EQ(2, specify.at(1));
+}
+
+TEST(BasicTest, AccumulateScalarOptions_WrongType_NEG)
+{
+  /* arrange */
+  Arser arser;
+
+  arser.add_argument("--specify").nargs(1).accumulated(true).type(arser::DataType::FLOAT);
+
+  Prompt prompt("./driver --specify 1 --specify 2");
+  /* act */
+  arser.parse(prompt.argc(), prompt.argv());
+  /* assert */
+  EXPECT_TRUE(arser["--specify"]);
+
+  EXPECT_THROW(arser.get<float>("--specify"), std::runtime_error);
+}
diff --git a/compiler/circle-opselector/CMakeLists.txt b/compiler/circle-opselector/CMakeLists.txt

new file mode 100644 (file)

index 0000000..93ab84c
--- /dev/null
+++ b/compiler/circle-opselector/CMakeLists.txt
@@ -0,0 +1,36 @@
+set(DRIVER "driver/Driver.cpp")
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_executable(circle-opselector ${DRIVER} ${SOURCES})
+target_include_directories(circle-opselector PRIVATE src)
+target_link_libraries(circle-opselector foder)
+target_link_libraries(circle-opselector safemain)
+target_link_libraries(circle-opselector loco)
+target_link_libraries(circle-opselector luci_import)
+target_link_libraries(circle-opselector luci_export)
+target_link_libraries(circle-opselector arser)
+target_link_libraries(circle-opselector vconone)
+target_link_libraries(circle-opselector luci_service)
+target_link_libraries(circle-opselector luci_profile)
+
+install(TARGETS circle-opselector DESTINATION bin)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(circle-opselector-test ${TESTS} ${SOURCES} ${DRIVER})
+target_include_directories(circle-opselector-test PRIVATE src)
+target_link_libraries(circle-opselector-test foder)
+target_link_libraries(circle-opselector-test loco)
+target_link_libraries(circle-opselector-test luci_import)
+target_link_libraries(circle-opselector-test luci_export)
+target_link_libraries(circle-opselector-test arser)
+target_link_libraries(circle-opselector-test vconone)
+target_link_libraries(circle-opselector-test luci_service)
+target_link_libraries(circle-opselector-test luci_profile)
diff --git a/compiler/circle-opselector/README.md b/compiler/circle-opselector/README.md

new file mode 100644 (file)

index 0000000..c06899a
--- /dev/null
+++ b/compiler/circle-opselector/README.md
@@ -0,0 +1,21 @@
+# circle-opselector\r
+\r
+`circle-opselector` is a tool for creating new circle models by selecting nodes from a model.\r
+\r
+## Example\r
+\r
+### 1. Select from location numbers\r
+\r
+```bash\r
+./circle-opselector --by_id "1-3,5" input.circle output.circle\r
+```\r
+\r
+Then, output.circle which has node 1, 2, 3 and 5 will be created.\r
+\r
+### 2. Select from node names\r
+\r
+```bash\r
+./circle-opselector --by_name "Add_1,Sub_1,Concat_2" input.circle output.circle\r
+```\r
+\r
+Then, output.circle which has node Add_1, Sub_1 and Concat_2 will be created.\r
diff --git a/compiler/circle-opselector/driver/Driver.cpp b/compiler/circle-opselector/driver/Driver.cpp

new file mode 100644 (file)

index 0000000..a1ace4f
--- /dev/null
+++ b/compiler/circle-opselector/driver/Driver.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <luci/Profile/CircleNodeID.h>
+
+#include <arser/arser.h>
+#include <vconone/vconone.h>
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+#include <numeric>
+#include <sstream>
+
+void print_version(void)
+{
+  std::cout << "circle-opselector version " << vconone::get_string() << std::endl;
+  std::cout << vconone::get_copyright() << std::endl;
+}
+
+std::vector<std::string> split_into_vector(const std::string &str, const char &delim)
+{
+  std::vector<std::string> ret;
+  std::istringstream is(str);
+  for (std::string item; std::getline(is, item, delim);)
+  {
+    ret.push_back(item);
+  }
+
+  // remove empty string
+  ret.erase(std::remove_if(ret.begin(), ret.end(), [](const std::string &s) { return s.empty(); }),
+            ret.end());
+
+  return ret;
+}
+
+bool is_number(const std::string &s)
+{
+  return !s.empty() && std::find_if(s.begin(), s.end(),
+                                    [](unsigned char c) { return !std::isdigit(c); }) == s.end();
+}
+
+bool is_number(const std::vector<std::string> &vec)
+{
+  for (const auto &s : vec)
+  {
+    if (not::is_number(s))
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+/**
+ * @brief  Segmentation function for user's '--by_id' input
+ *
+ * @note   This function tokenizes the input data.s
+ *         First, divide it into ',', and if token has '-', devide it once more into '-'.
+ *         For example, if user input is '12,34,56', it is devided into [12,34,56].
+ *         If input is '1-2,34,56', it is devided into [[1,2],34,56].
+ *         And '-' means range so, if input is '2-7', it means all integer between 2-7.
+ */
+std::vector<uint32_t> split_id_input(const std::string &str)
+{
+  std::vector<uint32_t> by_id;
+
+  // tokenize colon-separated string
+  auto colon_tokens = ::split_into_vector(str, ',');
+  if (colon_tokens.empty()) // input empty line like "".
+  {
+    std::cerr << "ERROR: Nothing was entered." << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  for (const auto &ctok : colon_tokens)
+  {
+    auto dash_tokens = ::split_into_vector(ctok, '-');
+    if (not::is_number(dash_tokens))
+    {
+      std::cerr << "ERROR: To select operator by id, please use these args: [0-9], '-', ','"
+                << std::endl;
+      exit(EXIT_FAILURE);
+    }
+    // convert string into integer
+    std::vector<uint32_t> int_tokens;
+    try
+    {
+      std::transform(dash_tokens.begin(), dash_tokens.end(), std::back_inserter(int_tokens),
+                     [](const std::string &str) { return static_cast<uint32_t>(std::stoi(str)); });
+    }
+    catch (const std::out_of_range &)
+    {
+      // if input is big integer like '123467891234', stoi throw this exception.
+      std::cerr << "ERROR: Argument is out of range." << std::endl;
+      exit(EXIT_FAILURE);
+    }
+    catch (...)
+    {
+      std::cerr << "ERROR: Unknown error" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+
+    switch (int_tokens.size())
+    {
+      case 0: // inputs like "-"
+      {
+        std::cerr << "ERROR: Nothing was entered" << std::endl;
+        exit(EXIT_FAILURE);
+      }
+      case 1: // inputs like "1", "2"
+      {
+        by_id.push_back(int_tokens.at(0));
+        break;
+      }
+      case 2: // inputs like "1-2", "11-50"
+      {
+        for (uint32_t i = int_tokens.at(0); i <= int_tokens.at(1); i++)
+        {
+          by_id.push_back(i);
+        }
+        break;
+      }
+      default: // inputs like "1-2-3"
+      {
+        std::cerr << "ERROR: Too many '-' in str." << std::endl;
+        exit(EXIT_FAILURE);
+      }
+    }
+  }
+
+  return by_id;
+}
+
+std::vector<std::string> split_name_input(const std::string &str)
+{
+  return ::split_into_vector(str, ',');
+}
+
+int entry(int argc, char **argv)
+{
+  // TODO Add new option names!
+
+  arser::Arser arser("circle-opselector provides selecting operations in circle model");
+
+  arser.add_argument("--version")
+    .nargs(0)
+    .default_value(false)
+    .help("Show version information and exit")
+    .exit_with(print_version);
+
+  // TODO Add new options!
+
+  arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
+  arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
+
+  // select option
+  arser.add_argument("--by_id")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Input operation id to select nodes.");
+  arser.add_argument("--by_name")
+    .nargs(1)
+    .type(arser::DataType::STR)
+    .help("Input operation name to select nodes.");
+
+  try
+  {
+    arser.parse(argc, argv);
+  }
+  catch (const std::runtime_error &err)
+  {
+    std::cerr << err.what() << std::endl;
+    std::cout << arser;
+    return EXIT_FAILURE;
+  }
+
+  std::string input_path = arser.get<std::string>("input");
+  std::string output_path = arser.get<std::string>("output");
+
+  std::string operator_input;
+
+  std::vector<uint32_t> by_id;
+  std::vector<std::string> by_name;
+
+  if (!arser["--by_id"] && !arser["--by_name"] || arser["--by_id"] && arser["--by_name"])
+  {
+    std::cerr << "ERROR: Either option '--by_id' or '--by_name' must be specified" << std::endl;
+    std::cerr << arser;
+    return EXIT_FAILURE;
+  }
+
+  if (arser["--by_id"])
+  {
+    operator_input = arser.get<std::string>("--by_id");
+    by_id = split_id_input(operator_input);
+  }
+  if (arser["--by_name"])
+  {
+    operator_input = arser.get<std::string>("--by_name");
+    by_name = split_name_input(operator_input);
+  }
+
+  // Import original circle file.
+  auto module = opselector::getModule(input_path);
+
+  // Select nodes from user input.
+  std::vector<const luci::CircleNode *> selected_nodes;
+
+  // put selected nodes into vector.
+  if (by_id.size())
+  {
+    loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+
+    for (auto node : loco::all_nodes(graph))
+    {
+      auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+
+      try
+      {
+        auto node_id = luci::get_node_id(cnode); // if the node is not operator, throw runtime_error
+
+        for (auto selected_id : by_id)
+          if (selected_id == node_id) // find the selected id
+            selected_nodes.emplace_back(cnode);
+      }
+      catch (std::runtime_error)
+      {
+        continue;
+      }
+    }
+  }
+  if (by_name.size())
+  {
+    loco::Graph *graph = module.get()->graph(0); // get main subgraph.
+
+    for (auto node : loco::all_nodes(graph))
+    {
+      auto cnode = loco::must_cast<const luci::CircleNode *>(node);
+      std::string node_name = cnode->name();
+
+      for (auto selected_name : by_name)
+        if (selected_name.compare(node_name) == 0) // find the selected name
+          selected_nodes.emplace_back(cnode);
+    }
+  }
+  if (selected_nodes.size() == 0)
+  {
+    std::cerr << "ERROR: No operator selected" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  // TODO implement node selections
+
+  // Export to output Circle file
+  assert(opselector::exportModule(module.get(), output_path));
+
+  return 0;
+}
diff --git a/compiler/circle-opselector/requires.cmake b/compiler/circle-opselector/requires.cmake

new file mode 100644 (file)

index 0000000..dcdbcbb
--- /dev/null
+++ b/compiler/circle-opselector/requires.cmake
@@ -0,0 +1,6 @@
+require("foder")
+require("loco")
+require("safemain")
+require("luci")
+require("arser")
+require("vconone")
diff --git a/compiler/circle-opselector/src/Driver.test.cpp b/compiler/circle-opselector/src/Driver.test.cpp

new file mode 100644 (file)

index 0000000..6e56908
--- /dev/null
+++ b/compiler/circle-opselector/src/Driver.test.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Driver.test.h"
+#include "TestHelper.h"
+
+#include <gtest/gtest.h>
+
+TEST(DriverTest, NoArg_NEG)
+{
+  Argv<1> argv;
+  argv.add("circle-opselector");
+
+  ::testing::internal::CaptureStderr();
+  ::testing::internal::CaptureStdout();
+  int result = entry(1, argv.argv());
+  ::testing::internal::GetCapturedStdout();
+  ASSERT_EQ(EXIT_FAILURE, result);
+}
+
+TEST(DriverTest, Wrong_ID_NEG)
+{
+  std::string str1 = "1";
+  std::string empty = "";
+  std::string no_integer = "1531538X5";
+
+  ASSERT_EQ(true, is_number(str1));
+  ASSERT_EQ(false, is_number(empty));
+  ASSERT_EQ(false, is_number(no_integer));
+}
+
+TEST(DriverTest, Split)
+{
+  std::vector<uint32_t> vec1;
+  std::vector<uint32_t> vec2;
+
+  std::string hyphen = "1-3,8-10";
+  std::string comma = "1,2,3";
+
+  vec1.push_back(1);
+  vec1.push_back(2);
+  vec1.push_back(3);
+  vec1.push_back(8);
+  vec1.push_back(9);
+  vec1.push_back(10);
+
+  vec2.push_back(1);
+  vec2.push_back(2);
+  vec2.push_back(3);
+
+  ASSERT_EQ(vec1, split_id_input(hyphen));
+  ASSERT_EQ(vec2, split_id_input(comma));
+}
diff --git a/compiler/circle-opselector/src/Driver.test.h b/compiler/circle-opselector/src/Driver.test.h

new file mode 100644 (file)

index 0000000..06f1516
--- /dev/null
+++ b/compiler/circle-opselector/src/Driver.test.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
+#define __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
+
+#include <vector>
+#include <string>
+
+int entry(int argc, char **argv);
+bool is_number(const std::string &s);
+std::vector<uint32_t> split_id_input(const std::string &str);
+
+#endif // __CIRCLE_OPSELECTOR_DRIVER_TEST_H__
diff --git a/compiler/circle-opselector/src/ModuleIO.cpp b/compiler/circle-opselector/src/ModuleIO.cpp

new file mode 100644 (file)

index 0000000..46f45ce
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <foder/FileLoader.h>
+
+#include <luci/Importer.h>
+#include <luci/CircleExporter.h>
+#include <luci/CircleFileExpContract.h>
+
+#include <iostream>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path)
+{
+  // Load model from the file
+  foder::FileLoader file_loader{input_path};
+  std::vector<char> model_data = file_loader.load();
+
+  // Verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(model_data.data()), model_data.size()};
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cerr << "ERROR: Invalid input file '" << input_path << "'" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  const circle::Model *circle_model = circle::GetModel(model_data.data());
+  if (circle_model == nullptr)
+  {
+    std::cerr << "ERROR: Failed to load circle '" << input_path << "'" << std::endl;
+    exit(EXIT_FAILURE);
+  }
+
+  // Import from input Circle file
+  luci::Importer importer;
+
+  return importer.importModule(circle_model);
+}
+
+bool exportModule(luci::Module *module, std::string &output_path)
+{
+  luci::CircleExporter exporter;
+
+  luci::CircleFileExpContract contract(module, output_path);
+
+  if (!exporter.invoke(&contract))
+  {
+    std::cerr << "ERROR: Failed to export '" << output_path << "'" << std::endl;
+    return false;
+  }
+
+  return true;
+}
+
+} // namespace opselector
diff --git a/compiler/circle-opselector/src/ModuleIO.h b/compiler/circle-opselector/src/ModuleIO.h

new file mode 100644 (file)

index 0000000..39c704b
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_MODULEIO_H__
+#define __CIRCLE_OPSELECTOR_MODULEIO_H__
+
+#include <luci/IR/Module.h>
+
+#include <string>
+#include <memory>
+
+namespace opselector
+{
+
+std::unique_ptr<luci::Module> getModule(std::string &input_path);
+bool exportModule(luci::Module *module, std::string &output_path);
+
+} // namespace opselector
+
+#endif // __CIRCLE_OPSELECTOR_MODULEIO_H__
diff --git a/compiler/circle-opselector/src/ModuleIO.test.cpp b/compiler/circle-opselector/src/ModuleIO.test.cpp

new file mode 100644 (file)

index 0000000..a1e5c20
--- /dev/null
+++ b/compiler/circle-opselector/src/ModuleIO.test.cpp
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleIO.h"
+
+#include <gtest/gtest.h>
+
+TEST(ModuleIOTest, Export_nullptr)
+{
+  std::string output_path = "./test.out.circle";
+
+  ASSERT_EQ(false, opselector::exportModule(nullptr, output_path));
+}
diff --git a/compiler/circle-opselector/src/TestHelper.h b/compiler/circle-opselector/src/TestHelper.h

new file mode 100644 (file)

index 0000000..966e2b2
--- /dev/null
+++ b/compiler/circle-opselector/src/TestHelper.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+#define __CIRCLE_OPSELECTOR_TEST_HELPER_H__
+
+#include <cassert>
+#include <string.h>
+
+template <size_t N> class Argv
+{
+public:
+  typedef char *pchar_t;
+
+public:
+  ~Argv()
+  {
+    for (size_t n = 0; n < _ptr; ++n)
+      delete _argv[n];
+  }
+
+  void add(const char *in)
+  {
+    assert(_ptr < N);
+    _argv[_ptr] = new char[strlen(in) + 1];
+    strncpy(_argv[_ptr], in, strlen(in) + 1);
+    _ptr++;
+  }
+
+  pchar_t *argv(void) { return _argv; }
+
+private:
+  pchar_t _argv[N] = {
+    nullptr,
+  };
+  size_t _ptr = 0;
+};
+
+#endif // __CIRCLE_OPSELECTOR_TEST_HELPER_H__
diff --git a/compiler/circle-part-value-test/CMakeLists.txt b/compiler/circle-part-value-test/CMakeLists.txt

index b4b1b19db4b6e654a08ed4a12b53dcfecbdbbea2..1cfbcbd9bbddcf55416c61e6c8afc66c7944cca6 100644 (file)
--- a/compiler/circle-part-value-test/CMakeLists.txt
+++ b/compiler/circle-part-value-test/CMakeLists.txt
@@ -106,7 +106,7 @@ add_dependencies(circle_part_value_test_prepare common_artifacts_deps)
  add_test(NAME circle_part_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/part_eval_all.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
-          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
            "$<TARGET_FILE:circle_part_driver>"
            ${PARTITION_LIST}
  )
diff --git a/compiler/circle-partitioner/README.md b/compiler/circle-partitioner/README.md

index e1a0258dc3d8ed4a9f28f0f472653b07a69fa61e..5fd312e332219fc0eeb38cc05a8c1e31decbb115 100644 (file)
--- a/compiler/circle-partitioner/README.md
+++ b/compiler/circle-partitioner/README.md
@@ -49,8 +49,8 @@ DIV=acl_cl
  - `backends`: Existing partition group names which nodes should be placed, in CSV format.
  - `default`: Default group name which should be one of `backends` item.
  - `comply`: How to group nodes of the model.
-   - currently `opcode` is supported
-   - future work: set group by node name or sequence number.
+   - currently `opcode` and `opname` are supported
+   - future work: set group by sequence number.
  
  ##### `[OPCODE`] section
  
diff --git a/compiler/circle-quantizer/src/CircleQuantizer.cpp b/compiler/circle-quantizer/src/CircleQuantizer.cpp

index 5e717d08522e920d0a7dfd0cfa6904d2465568f9..1a09a8a2a5dffa6a154379f554b7805626b5076e 100644 (file)
--- a/compiler/circle-quantizer/src/CircleQuantizer.cpp
+++ b/compiler/circle-quantizer/src/CircleQuantizer.cpp
@@ -43,6 +43,7 @@ void print_exclusive_options(void)
    std::cout << "    --quantize_dequantize_weights" << std::endl;
    std::cout << "    --quantize_with_minmax" << std::endl;
    std::cout << "    --requantize" << std::endl;
+  std::cout << "    --force_quantparam" << std::endl;
  }
  
  void print_version(void)
@@ -63,6 +64,7 @@ int entry(int argc, char **argv)
    const std::string qdqw = "--quantize_dequantize_weights";
    const std::string qwmm = "--quantize_with_minmax";
    const std::string rq = "--requantize";
+  const std::string fq = "--force_quantparam";
  
    const std::string gpd = "--generate_profile_data";
  
@@ -105,6 +107,15 @@ int entry(int argc, char **argv)
            "Two arguments required: input_dtype(int8) "
            "output_dtype(uint8)");
  
+  arser.add_argument(fq)
+    .nargs(3)
+    .type(arser::DataType::STR_VEC)
+    .required(false)
+    .accumulated(true)
+    .help("Write quantization parameters to the specified tensor. "
+          "Three arguments required: tensor_name(string), "
+          "scale(float) zero_point(int)");
+
    arser.add_argument("input").nargs(1).type(arser::DataType::STR).help("Input circle model");
    arser.add_argument("output").nargs(1).type(arser::DataType::STR).help("Output circle model");
  
@@ -123,10 +134,11 @@ int entry(int argc, char **argv)
    }
  
    {
-    // only one of qdqw, qwmm, rq option can be used
+    // only one of qdqw, qwmm, rq, fq option can be used
      int32_t opt_used = arser[qdqw] ? 1 : 0;
      opt_used += arser[qwmm] ? 1 : 0;
      opt_used += arser[rq] ? 1 : 0;
+    opt_used += arser[fq] ? 1 : 0;
      if (opt_used != 1)
      {
        print_exclusive_options();
@@ -185,6 +197,34 @@ int entry(int argc, char **argv)
      options->param(AlgorithmParameters::Quantize_output_dtype, values.at(1));
    }
  
+  if (arser[fq])
+  {
+    auto values = arser.get<std::vector<std::vector<std::string>>>(fq);
+
+    std::vector<std::string> tensors;
+    std::vector<std::string> scales;
+    std::vector<std::string> zero_points;
+
+    for (auto const value : values)
+    {
+      if (value.size() != 3)
+      {
+        std::cerr << arser;
+        return 255;
+      }
+
+      tensors.push_back(value[0]);
+      scales.push_back(value[1]);
+      zero_points.push_back(value[2]);
+    }
+
+    options->enable(Algorithms::ForceQuantParam);
+
+    options->params(AlgorithmParameters::Quantize_tensor_names, tensors);
+    options->params(AlgorithmParameters::Quantize_scales, scales);
+    options->params(AlgorithmParameters::Quantize_zero_points, zero_points);
+  }
+
    std::string input_path = arser.get<std::string>("input");
    std::string output_path = arser.get<std::string>("output");
  
diff --git a/compiler/circle2circle-dredd-recipe-test/test.lst b/compiler/circle2circle-dredd-recipe-test/test.lst

index 95822c758f8a31990f7adbb2194a48188c305d56..f41aac303663373e242e96ec506fc9ebb149ab22 100644 (file)
--- a/compiler/circle2circle-dredd-recipe-test/test.lst
+++ b/compiler/circle2circle-dredd-recipe-test/test.lst
@@ -41,6 +41,7 @@ Add(Net_Maximum_Minimum_000 PASS transform_min_max_to_relu6)
  Add(BatchMatMulV2_000 PASS resolve_customop_batchmatmul)
  Add(MatMul_000 PASS resolve_customop_matmul)
  Add(DepthwiseConv2D_003 PASS)
+Add(PadV2_001 PASS substitute_padv2_to_pad)
  Add(StridedSlice_003 PASS substitute_strided_slice_to_reshape)
  Add(MaxPoolWithArgmax_000 PASS resolve_customop_max_pool_with_argmax)
  Add(MaxPoolWithArgmax_001 PASS resolve_customop_max_pool_with_argmax)
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp

index 1998b16460c63c9bc6007e07d38382a668dd74c2..a5ddb26dc954eebdede6fa3be0c08d2d6176fd58 100644 (file)
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -98,6 +98,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will fold dequantize op");
  
+  arser.add_argument("--fold_dwconv")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fold Depthwise Convolution operator with constant inputs");
+
    arser.add_argument("--fold_sparse_to_dense")
      .nargs(0)
      .required(false)
@@ -116,6 +122,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will fuse Activation function to a preceding operator");
  
+  arser.add_argument("--fuse_add_with_fully_connected")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will fuse Add operator to FullyConnected operator");
+
    arser.add_argument("--fuse_add_with_tconv")
      .nargs(0)
      .required(false)
@@ -282,6 +294,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will convert certain condition PadV2 to Pad");
  
+  arser.add_argument("--substitute_splitv_to_split")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will convert certain condition SplitV to Split operator");
+
    arser.add_argument("--substitute_squeeze_to_reshape")
      .nargs(0)
      .required(false)
@@ -300,6 +318,12 @@ int entry(int argc, char **argv)
      .default_value(false)
      .help("This will convert single input Transpose to Reshape");
  
+  arser.add_argument("--expand_broadcast_const")
+    .nargs(0)
+    .required(false)
+    .default_value(false)
+    .help("This will expand broadcastable constant inputs");
+
    arser.add_argument("--convert_nchw_to_nhwc")
      .nargs(0)
      .required(false)
@@ -426,6 +450,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::FoldCast);
    if (arser.get<bool>("--fold_dequantize"))
      options->enable(Algorithms::FoldDequantize);
+  if (arser.get<bool>("--fold_dwconv"))
+    options->enable(Algorithms::FoldDepthwiseConv2D);
    if (arser.get<bool>("--fold_sparse_to_dense"))
      options->enable(Algorithms::FoldSparseToDense);
    if (arser.get<bool>("--forward_reshape_to_unaryop"))
@@ -434,6 +460,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::FuseActivationFunction);
    if (arser.get<bool>("--fuse_batchnorm_with_conv"))
      options->enable(Algorithms::FuseBatchNormWithConv);
+  if (arser.get<bool>("--fuse_add_with_fully_connected"))
+    options->enable(Algorithms::FuseAddWithFullyConnected);
    if (arser.get<bool>("--fuse_add_with_tconv"))
      options->enable(Algorithms::FuseAddWithTConv);
    if (arser.get<bool>("--fuse_batchnorm_with_dwconv"))
@@ -486,6 +514,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::SubstitutePackToReshape);
    if (arser.get<bool>("--substitute_padv2_to_pad"))
      options->enable(Algorithms::SubstitutePadV2ToPad);
+  if (arser.get<bool>("--substitute_splitv_to_split"))
+    options->enable(Algorithms::SubstituteSplitVToSplit);
    if (arser.get<bool>("--substitute_squeeze_to_reshape"))
      options->enable(Algorithms::SubstituteSqueezeToReshape);
    if (arser.get<bool>("--substitute_strided_slice_to_reshape"))
@@ -496,6 +526,8 @@ int entry(int argc, char **argv)
      options->enable(Algorithms::TransformMinMaxToRelu6Pass);
    if (arser.get<bool>("--transform_min_relu_to_relu6"))
      options->enable(Algorithms::TransformMinReluToRelu6Pass);
+  if (arser.get<bool>("--expand_broadcast_const"))
+    options->enable(Algorithms::ExpandBroadcastConst);
  
    if (arser.get<bool>("--mute_warnings"))
      settings->set(luci::UserSettings::Key::MuteWarnings, true);
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt

index 8ef68370d57b237bde49a854ed8ff25706f73850..7848ac722c0724a698feb348e7662e008cc6b87c 100644 (file)
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -11,6 +11,6 @@ target_include_directories(circledump PRIVATE include)
  target_link_libraries(circledump arser)
  target_link_libraries(circledump mio_circle)
  target_link_libraries(circledump safemain)
-target_link_libraries(circledump flatbuffers)
+target_link_libraries(circledump flatbuffers-1.10)
  
  install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/common-artifacts/CMakeLists.txt b/compiler/common-artifacts/CMakeLists.txt

index edca29b34672f953adbe1644ab2127f0e484c302..6de634a2509893c0925b333bea409c09c6454b14 100644 (file)
--- a/compiler/common-artifacts/CMakeLists.txt
+++ b/compiler/common-artifacts/CMakeLists.txt
@@ -17,6 +17,8 @@ set(VIRTUALENV_OVERLAY_TF_1_13_2 "${NNCC_OVERLAY_DIR}/venv_1_13_2")
  
  # Create python virtual environment with tensorflow 2.3.0
  set(VIRTUALENV_OVERLAY_TF_2_3_0 "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+# Create python virtual environment with tensorflow 2.6.0
+set(VIRTUALENV_OVERLAY_TF_2_6_0 "${NNCC_OVERLAY_DIR}/venv_2_6_0")
  
  add_custom_command(
    OUTPUT ${VIRTUALENV_OVERLAY_TF_1_13_2}
@@ -27,11 +29,16 @@ add_custom_command(
    OUTPUT ${VIRTUALENV_OVERLAY_TF_2_3_0}
    COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_3_0}
  )
+add_custom_command(
+  OUTPUT ${VIRTUALENV_OVERLAY_TF_2_6_0}
+  COMMAND ${PYTHON_EXECUTABLE} -m venv ${VIRTUALENV_OVERLAY_TF_2_6_0}
+)
  
  # Create requirements.txt and install required pip packages
  set(REQUIREMENTS_FILE "requirements.txt")
  set(REQUIREMENTS_OVERLAY_PATH_TF_1_13_2 "${VIRTUALENV_OVERLAY_TF_1_13_2}/${REQUIREMENTS_FILE}")
  set(REQUIREMENTS_OVERLAY_PATH_TF_2_3_0 "${VIRTUALENV_OVERLAY_TF_2_3_0}/${REQUIREMENTS_FILE}")
+set(REQUIREMENTS_OVERLAY_PATH_TF_2_6_0 "${VIRTUALENV_OVERLAY_TF_2_6_0}/${REQUIREMENTS_FILE}")
  
  # TODO remove version number of '--upgrade pip==20.2.1 setuptools==49.3.0'
  # NOTE adding version is for temporary hotfix of setuptools 50.x.y version
@@ -53,8 +60,23 @@ add_custom_command(
    DEPENDS ${VIRTUALENV_OVERLAY_TF_2_3_0}
  )
  
+add_custom_command(
+  OUTPUT ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "tensorflow-cpu==2.6.0" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+  COMMAND ${CMAKE_COMMAND} -E echo "flatbuffers==1.12" >> ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install --upgrade pip==20.2.1 setuptools==49.3.0
+  COMMAND ${VIRTUALENV_OVERLAY_TF_2_6_0}/bin/python -m pip --default-timeout=1000 install -r ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0} --upgrade
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_2_6_0}
+)
+
  add_custom_target(common_artifacts_python_deps ALL
-  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2} ${VIRTUALENV_OVERLAY_TF_2_3_0} ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2} ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+  DEPENDS ${VIRTUALENV_OVERLAY_TF_1_13_2}
+          ${VIRTUALENV_OVERLAY_TF_2_3_0}
+          ${VIRTUALENV_OVERLAY_TF_2_6_0}
+          ${REQUIREMENTS_OVERLAY_PATH_TF_1_13_2}
+          ${REQUIREMENTS_OVERLAY_PATH_TF_2_3_0}
+          ${REQUIREMENTS_OVERLAY_PATH_TF_2_6_0}
  )
  
  #[[ Generate common resources ]]
diff --git a/compiler/enco/frontend/tflite/CMakeLists.txt b/compiler/enco/frontend/tflite/CMakeLists.txt

index ea10fbc4b5ada3480ba2ce0bba6dfb6a61026b0b..b2de2b34b54d3c465baf039c194b4922938cf2b7 100644 (file)
--- a/compiler/enco/frontend/tflite/CMakeLists.txt
+++ b/compiler/enco/frontend/tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    return()
@@ -17,7 +17,7 @@ add_library(enco_tflite_frontend SHARED ${SOURCES})
  target_include_directories(enco_tflite_frontend PRIVATE src)
  target_link_libraries(enco_tflite_frontend enco_intf_frontend)
  target_link_libraries(enco_tflite_frontend enco_intf_cmdline)
-target_link_libraries(enco_tflite_frontend flatbuffers)
+target_link_libraries(enco_tflite_frontend flatbuffers-1.10)
  target_link_libraries(enco_tflite_frontend enco_tflite_schema)
  target_link_libraries(enco_tflite_frontend morph)
  target_link_libraries(enco_tflite_frontend cwrap)
diff --git a/compiler/exo/CMakeLists.txt b/compiler/exo/CMakeLists.txt

index e686cbb83f19717ddbc18b2726450de8e70d0f02..9d02f7cba529acf31fe404cb7244e117d62fe009 100644 (file)
--- a/compiler/exo/CMakeLists.txt
+++ b/compiler/exo/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build exo: FALSE (missing FlatBuffers)")
diff --git a/compiler/luci-interpreter/CMakeLists.txt b/compiler/luci-interpreter/CMakeLists.txt

index ab4ec1f439032a0ecc1f7679030f43f538dc6abb..1f7acee874fa0f314991a361ebd68e66be5089f1 100644 (file)
--- a/compiler/luci-interpreter/CMakeLists.txt
+++ b/compiler/luci-interpreter/CMakeLists.txt
@@ -4,4 +4,12 @@ if (NOT LUCI_INTERPRETER_PAL_DIR)
      set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
  endif()
  
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
  add_subdirectory(src)
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md

new file mode 100644 (file)

index 0000000..4a9a34e
--- /dev/null
+++ b/compiler/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model. 
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user 
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+  void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+  {
+    if (tensor->element_type() != loco::DataType::FLOAT32)
+      return;
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+      std::cout << tensor->data<float>[i] << ", ";
+  }
+
+  // User observer can override only needed methods,
+  // others will inherit empty implementation from base observer.
+
+  // void preOperatorExecute(const luci::CircleNode *node);
+  // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. 
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+  For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+  constexpr int buffer_size = 2048;
+  static uint8_t buffer[buffer_size];
+  luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+  luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h

new file mode 100644 (file)

index 0000000..205baa6
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h

index 7a14bf6f8d554f7dc6d6dee102ac4ce1a1c17faa..7dee8a7f2dec6dab7d319d33e033f29303069d8f 100644 (file)
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -22,6 +22,7 @@
  #include <luci/IR/Nodes/CircleInput.h>
  #include <luci/IR/Nodes/CircleOutput.h>
  
+#include "luci_interpreter/MemoryManager.h"
  #include <luci/IR/Module.h>
  
  #include <memory>
@@ -49,7 +50,7 @@ public:
  class Interpreter
  {
  public:
-  explicit Interpreter(const luci::Module *module);
+  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr);
  
    ~Interpreter();
  
@@ -64,7 +65,11 @@ public:
    const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
  
  private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
    std::unique_ptr<class RuntimeModule> _runtime_module;
+  IMemoryManager *_memory_manager = nullptr;
  
    // Observer functionality support.
    std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
diff --git a/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h

new file mode 100644 (file)

index 0000000..f32c520
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+  virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+  virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+  virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h

new file mode 100644 (file)

index 0000000..658a1c6
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h

new file mode 100644 (file)

index 0000000..ded7bde
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+  { /* Do nothing */
+  }
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h

new file mode 100644 (file)

index 0000000..397bbed
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h

index e356bce92b13341ed66d206b40e797dc2ccad457..bb9ff6d4aacf37d04cc4712a4f1166a98c9500e6 100644 (file)
--- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -107,9 +107,6 @@ public:
      return _quantization.zero_point[0];
    }
  
-  void allocate();
-  void deallocate();
-
    const std::vector<float> &scales() const { return _quantization.scale; }
  
    const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
@@ -118,15 +115,16 @@ public:
  
    template <typename T> const T *data() const
    {
-    assert(_data_allocated);
-    return reinterpret_cast<const T *>(_data.get());
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<const T *>(_data);
    }
  
    template <typename T> T *data()
    {
-    if (!_data_allocated)
-      allocate();
-    return reinterpret_cast<T *>(_data.get());
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<T *>(_data);
    }
  
    const std::string &name() const { return _name; }
@@ -137,13 +135,50 @@ public:
  
    void resize(const Shape &new_shape);
  
+  void set_data_buffer(uint8_t *buffer)
+  {
+    if (buffer == nullptr)
+    {
+      _data_allocated = false;
+    }
+    else
+    {
+      _data_allocated = true;
+    }
+    _data = buffer;
+  }
+
+  bool is_observable() const { return _is_observable; }
+
+  void set_observable(bool value) { _is_observable = value; }
+
+  bool is_allocatable() const { return _is_allocatable; }
+
+  void set_allocatable(bool value) { _is_allocatable = value; }
+
+  bool is_data_allocated() const { return _data_allocated; }
+
+  int32_t get_offset() const { return _offset; }
+
+  void set_offset(int32_t offset) { _offset = offset; }
+
  private:
    DataType _element_type;
    Shape _shape;
    AffineQuantization _quantization;
-  std::unique_ptr<uint8_t[]> _data;
+  uint8_t *_data;
    std::string _name;
    bool _data_allocated;
+  // Write of tensor is reported to registered Observers only if this tensor is observable
+  // This is needed for tensors used in kernel implementation, but not present in original model.
+  bool _is_observable = true;
+  // Memory manager is called for tensor only if it is "allocatable".
+  // Kernel configuration could disable allocation of some tensors if they are not needed for
+  // particular operation.
+  bool _is_allocatable = true;
+  // Used by static memory manager.
+  // Stores the offset from the beginning of the allocated memory buffer.
+  int32_t _offset = -1;
  };
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst

new file mode 100644 (file)

index 0000000..9d54127
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,68 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-interpreter/pal/linux/PALArgMax.h

new file mode 100644 (file)

index 0000000..21e6329
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h

new file mode 100644 (file)

index 0000000..3fe2022
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h

new file mode 100644 (file)

index 0000000..2550dd5
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &im2col_shape,
+                        float *im2col_data)
+{
+  if (im2col_data)
+  {
+    tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                                im2col_data);
+  }
+  else
+    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data,
+                                tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                        uint8 *im2col_data)
+{
+  // TODO This should only be done once (although it takes only a few microseconds).
+  //  Also, the user should be able to adjust the number of threads.
+  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                              im2col_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                                  int8 *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  // TODO enable optimized version
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h

new file mode 100644 (file)

index 0000000..f9ebfcf
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-interpreter/pal/linux/PALElu.h

new file mode 100644 (file)

index 0000000..cb365ff
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h

new file mode 100644 (file)

index 0000000..6c663e2
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h

new file mode 100644 (file)

index 0000000..aac57f2
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h

new file mode 100644 (file)

index 0000000..e8209ba
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h

new file mode 100644 (file)

index 0000000..54f7f09
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                           const tflite::RuntimeShape &input_shape, const float *input_data,
+                           const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+                                                    output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h

new file mode 100644 (file)

index 0000000..a32e3ee
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+                              const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+                              const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h

new file mode 100644 (file)

index 0000000..cfaec1b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const float *input2_data, const tflite::RuntimeShape &output_shape,
+                       float *output_data)
+{
+  tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+                                      const tflite::RuntimeShape &input1_shape,
+                                      const float *input1_data,
+                                      const tflite::RuntimeShape &input2_shape,
+                                      const float *input2_data,
+                                      const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-interpreter/pal/linux/PALNeg.h

new file mode 100644 (file)

index 0000000..797ffee
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-interpreter/pal/linux/PALRelu.h

new file mode 100644 (file)

index 0000000..b4c715d
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-interpreter/pal/linux/PALRelu6.h

new file mode 100644 (file)

index 0000000..bf2f91a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+                         const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h

new file mode 100644 (file)

index 0000000..7380081
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h

new file mode 100644 (file)

index 0000000..74d1926
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-interpreter/pal/linux/PALSlice.h

new file mode 100644 (file)

index 0000000..640a716
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+                         const tflite::RuntimeShape &input_shape, const T *input_data,
+                         const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-interpreter/pal/linux/PALSoftmax.h

new file mode 100644 (file)

index 0000000..b197e79
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const In *input_data,
+                           const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+  tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h

new file mode 100644 (file)

index 0000000..5e8de9b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h

new file mode 100644 (file)

index 0000000..52d2a5b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-interpreter/pal/linux/PALSplit.h

new file mode 100644 (file)

index 0000000..4d8da72
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+                         const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+                         Scalar *const *output_data)
+{
+  tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-interpreter/pal/linux/PALSub.h

new file mode 100644 (file)

index 0000000..04080d6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake

index da880c64c89f17f3cd65e0fc874b4f54ca4fdd56..84349e0bfb1bdf59ccd89e41322cc3b0d9db0226 100644 (file)
--- a/compiler/luci-interpreter/pal/linux/pal.cmake
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -1,8 +1,8 @@
  macro(initialize_pal)
-    nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
  
      if (NOT TensorFlowSource_FOUND)
          message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -43,7 +43,12 @@ macro(add_pal_to_target TGT)
      set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
      add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
      set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}")
+    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
  
      target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
  endmacro()
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst

new file mode 100644 (file)

index 0000000..771974a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,56 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-interpreter/pal/mcu/PALArgMax.h

new file mode 100644 (file)

index 0000000..21e6329
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h

new file mode 100644 (file)

index 0000000..4dd77ff
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h

new file mode 100644 (file)

index 0000000..0a8ae4e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &im2col_shape,
+                        float *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                        uint8 *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                              im2col_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &im2col_shape,
+                                  int8 *im2col_data)
+{
+  (void)im2col_shape;
+  (void)im2col_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h

new file mode 100644 (file)

index 0000000..8463e57
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-interpreter/pal/mcu/PALElu.h

new file mode 100644 (file)

index 0000000..4089d0a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h

new file mode 100644 (file)

index 0000000..f84742a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h

new file mode 100644 (file)

index 0000000..38a302f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h

new file mode 100644 (file)

index 0000000..9ccd222
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h

new file mode 100644 (file)

index 0000000..2b46b10
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const float *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const float *input2_data, const tflite::RuntimeShape &output_shape,
+                       float *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
+                                      const tflite::RuntimeShape &input1_shape,
+                                      const float *input1_data,
+                                      const tflite::RuntimeShape &input2_shape,
+                                      const float *input2_data,
+                                      const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-interpreter/pal/mcu/PALNeg.h

new file mode 100644 (file)

index 0000000..be5903a
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h

new file mode 100644 (file)

index 0000000..cc9f0fd
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h

new file mode 100644 (file)

index 0000000..f4d5a6e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h

new file mode 100644 (file)

index 0000000..9838b54
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h

new file mode 100644 (file)

index 0000000..fdddaa9
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h

new file mode 100644 (file)

index 0000000..816b7f6
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-interpreter/pal/mcu/PALSub.h

new file mode 100644 (file)

index 0000000..ea57578
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake

index 2307ac727540b9106f81288e421c329643d718ef..a479d407bf8f90e2ef0721751fba88d7b9c7bbf9 100644 (file)
--- a/compiler/luci-interpreter/pal/mcu/pal.cmake
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -1,8 +1,8 @@
  macro(initialize_pal)
-    nnas_find_package(TensorFlowSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowEigenSource EXACT 2.3.0 QUIET)
-    nnas_find_package(TensorFlowRuySource EXACT 2.3.0 QUIET)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
  
      if (NOT TensorFlowSource_FOUND)
          message(STATUS "Skipping luci-interpreter: TensorFlow not found")
@@ -30,7 +30,7 @@ endmacro()
  
  macro(add_pal_to_target TGT)
      target_include_directories(${TGT} PRIVATE "${PAL}")
-    target_include_directories(${TGT} SYSTEM PRIVATE
+    target_include_directories(${TGT} PRIVATE
              "${TensorFlowRuySource_DIR}"
              "${TensorFlowGEMMLowpSource_DIR}"
              "${TensorFlowEigenSource_DIR}"
@@ -42,7 +42,12 @@ macro(add_pal_to_target TGT)
      set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
      add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
      set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
-    target_include_directories(luci_interpreter_mcu_pal SYSTEM PRIVATE "${TensorFlowSource_DIR}")
+    target_include_directories(luci_interpreter_mcu_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
  
      target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
      #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp

new file mode 100644 (file)

index 0000000..6ad1f32
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  assert(l < 32);
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp

new file mode 100644 (file)

index 0000000..29fb767
--- /dev/null
+++ b/compiler/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt

index 6f34b61170dfdd6f0e31c98ad5137d83f903c7bd..e371503360d0d7a97748ae863dea33c50c0fc00f 100644 (file)
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -1,13 +1,19 @@
-include(${LUCI_INTERPRETER_PAL_DIR}/pal.cmake)
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
  
  initialize_pal()
  
  if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
    return()
  endif()
  
  message(STATUS "LUCI INTERPRETER BEGIN")
  
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+
  add_subdirectory(core)
  message(STATUS "LUCI INTERPRETER CORE")
  add_subdirectory(kernels)
@@ -19,15 +25,34 @@ message(STATUS "LUCI INTERPTER INITALIZED")
  
  set(SOURCES
      "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
-    Interpreter.cpp)
-
-add_library(luci_interpreter SHARED ${SOURCES})
-target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter
-    PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core
+    Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+  add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+  add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+    PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
      PRIVATE nncc_common)
  
-install(TARGETS luci_interpreter DESTINATION lib)
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
  install(DIRECTORY include/ DESTINATION include
          FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp

index b57b691d08857c0663b61d5972be3292f0846c6d..1b8792a6cd583de27605fd45aab8315f7148764e 100644 (file)
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -15,6 +15,7 @@
   */
  
  #include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
  
  #include "loader/ModuleLoader.h"
  
@@ -69,12 +70,25 @@ private:
  
  } // namespace
  
-Interpreter::Interpreter(const luci::Module *module)
+Interpreter::Interpreter(const luci::Module *module,
+                         luci_interpreter::IMemoryManager *memory_manager)
  {
    _runtime_to_ir = std::make_unique<RuntimeToIR>();
    _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
    _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
-  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor);
+
+  if (memory_manager == nullptr)
+  {
+    _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+    _memory_manager = _default_memory_manager.get();
+  }
+  else
+  {
+    _memory_manager = memory_manager;
+  }
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      _memory_manager);
    loader.load();
  }
  
diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp

new file mode 100644 (file)

index 0000000..230e398
--- /dev/null
+++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_data_allocated())
+  {
+    tensor.set_data_buffer(nullptr);
+    return;
+  }
+  auto data = tensor.data<uint8_t>();
+  delete[] data;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-interpreter/src/StaticMemoryManager.cpp

new file mode 100644 (file)

index 0000000..73a8199
--- /dev/null
+++ b/compiler/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  int32_t offset = tensor.get_offset();
+  assert(offset >= 0);
+  auto tensor_ptr = _buffer_ptr + offset;
+  tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-interpreter/src/TestMemoryManager.cpp

new file mode 100644 (file)

index 0000000..3beeee5
--- /dev/null
+++ b/compiler/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt

index e576dbd94f3deb712c47bb4c2e6514b8ae1fc50e..4430cba11af057f7c7558081c7545e4dda571620 100644 (file)
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -9,9 +9,9 @@ set(SOURCES
      RuntimeModule.h
      Tensor.cpp)
  
-add_library(luci_interpreter_core STATIC ${SOURCES})
-set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
-target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_core PUBLIC luci_lang)
-target_link_libraries(luci_interpreter_core PRIVATE nncc_common)
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h

index 5cdb2e360c1273a295440437a8e720c3f75e12fd..a7c4a42186fd938d752934ef4153a61903204a5c 100644 (file)
--- a/compiler/luci-interpreter/src/core/Kernel.h
+++ b/compiler/luci-interpreter/src/core/Kernel.h
@@ -36,8 +36,8 @@ protected:
  public:
    virtual ~Kernel() = default;
  
-  std::vector<const Tensor *> getInputTensors() const { return _inputs; }
-  std::vector<Tensor *> getOutputTensors() const { return _outputs; }
+  const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
  
    // Configures the kernel.
    // This function is currently called once for each kernel during interpreter construction,
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp

index fb0ad304ba64879968e553728abb9483648c0b61..c2f8d2ea8a843eb1b653d62875a201ac132a92e3 100644 (file)
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -29,8 +29,10 @@ class RuntimeGraph::TensorAllocPlan
    std::vector<std::vector<Tensor *>> _alloc_plan;
    std::vector<std::vector<Tensor *>> _dealloc_plan;
    bool _valid = false;
+  IMemoryManager *_memory_manager;
  
  public:
+  explicit TensorAllocPlan(IMemoryManager *memory_manager);
    void invalidate() { _valid = false; }
    bool isValid() const { return _valid; }
    void build(const RuntimeGraph &graph);
@@ -38,6 +40,11 @@ public:
    void deallocate(size_t kernel_index) const;
  };
  
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+  : _memory_manager(memory_manager)
+{
+}
+
  void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
  {
    invalidate();
@@ -80,7 +87,7 @@ void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
    assert(_valid && kernel_index < _alloc_plan.size());
    for (Tensor *tensor : _alloc_plan[kernel_index])
    {
-    tensor->allocate();
+    _memory_manager->allocate_memory(*tensor);
    }
  }
  
@@ -89,16 +96,24 @@ void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
    assert(_valid && kernel_index < _dealloc_plan.size());
    for (Tensor *tensor : _dealloc_plan[kernel_index])
    {
-    tensor->deallocate();
+    _memory_manager->release_memory(*tensor);
    }
  }
  
-RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module)
-  : _owning_module(owning_module), _tensor_alloc_plan(std::make_unique<TensorAllocPlan>())
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+  : _owning_module(owning_module), _memory_manager(memory_manager),
+    _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
  {
  }
  
-RuntimeGraph::~RuntimeGraph() {}
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &tensor : _tensors)
+  {
+    if (tensor->is_data_allocated())
+      _memory_manager->release_memory(*tensor);
+  }
+}
  
  Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
  {
@@ -121,6 +136,11 @@ void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
    _output_tensors = output_tensors;
  }
  
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+  _memory_manager->allocate_memory(*tensor);
+}
+
  void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
  {
    assert(kernel != nullptr);
@@ -140,7 +160,8 @@ void RuntimeGraph::execute() const
    {
      for (const Tensor *input_tensor : getInputTensors())
      {
-      event_notifier->postTensorWrite(input_tensor);
+      if (input_tensor->is_observable())
+        event_notifier->postTensorWrite(input_tensor);
      }
    }
  
@@ -155,11 +176,10 @@ void RuntimeGraph::execute() const
      // TODO The `configure` method should only be called if the outputs of an operator need to be
      //  resized.
      kernel->configure();
-// TODO decide where to allocate memory, and uncomment/remove this if
-#if 0
-    _tensor_alloc_plan->allocate(
-        index); // Preallocate outputs in advance instead of relying on automatic allocation
-#endif
+
+    // Preallocate outputs in advance instead of relying on automatic allocation
+    _tensor_alloc_plan->allocate(index);
+
      kernel->execute();
  
      if (event_notifier != nullptr)
@@ -169,7 +189,7 @@ void RuntimeGraph::execute() const
  
      for (const Tensor *tensor : kernel->getOutputTensors())
      {
-      if (event_notifier != nullptr)
+      if (event_notifier != nullptr && tensor->is_observable())
        {
          event_notifier->postTensorWrite(tensor);
        }
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h

index 5f732025d3ed8eb8dabcb5f929a72934425895e0..8184e249d85f02b69c0e1970bf7bb300c5670009 100644 (file)
--- a/compiler/luci-interpreter/src/core/RuntimeGraph.h
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h
@@ -18,6 +18,7 @@
  #define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
  
  #include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
  #include "core/Kernel.h"
  
  #include <memory>
@@ -35,7 +36,7 @@ private:
    friend class TensorAllocPlan;
  
  public:
-  explicit RuntimeGraph(RuntimeModule *owning_module);
+  explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
    ~RuntimeGraph();
  
    Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
@@ -43,6 +44,8 @@ public:
    void setInputTensors(const std::vector<Tensor *> &input_tensors);
    void setOutputTensors(const std::vector<Tensor *> &output_tensors);
  
+  void configureAllocations(Tensor *tensor);
+
    const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
    const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
  
@@ -51,6 +54,7 @@ public:
    void execute() const;
  
  private:
+  IMemoryManager *_memory_manager;
    RuntimeModule *_owning_module;
    std::vector<std::unique_ptr<Tensor>> _tensors;
    std::vector<Tensor *> _input_tensors;
diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h

index dccc3a1732064da102981c6837c4950addf99250..78873b0ec12c17b73ecd05cccac2e21873607c82 100644 (file)
--- a/compiler/luci-interpreter/src/core/RuntimeModule.h
+++ b/compiler/luci-interpreter/src/core/RuntimeModule.h
@@ -19,6 +19,7 @@
  
  #include "core/RuntimeGraph.h"
  #include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
  
  #include <memory>
  #include <vector>
@@ -33,9 +34,9 @@ public:
  
    EventNotifier *getEventNotifier() const { return _event_notifier; }
  
-  RuntimeGraph *addGraph()
+  RuntimeGraph *addGraph(IMemoryManager *memory_manager)
    {
-    _graphs.push_back(std::make_unique<RuntimeGraph>(this));
+    _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
      return _graphs.back().get();
    }
  
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp

index a9e7be0a9a55060f62f470722943ce5e39323331..3c3c5ffffe8c84182aa63b6f0e5061560aff7e6c 100644 (file)
--- a/compiler/luci-interpreter/src/core/Tensor.cpp
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -29,21 +29,6 @@ Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantizati
  {
  }
  
-void Tensor::allocate()
-{
-  deallocate();
-  const size_t element_size = getDataTypeSize(_element_type);
-  const int32_t num_elements = _shape.num_elements();
-  _data = std::make_unique<uint8_t[]>(num_elements * element_size);
-  _data_allocated = true;
-}
-
-void Tensor::deallocate()
-{
-  _data_allocated = false;
-  _data.reset();
-}
-
  void Tensor::readData(void *data_ptr, size_t data_size) const
  {
    const size_t element_size = getDataTypeSize(element_type());
@@ -68,10 +53,6 @@ void Tensor::writeData(const void *data_ptr, size_t data_size)
    std::memcpy(data<void>(), data_ptr, data_size);
  }
  
-void Tensor::resize(const Shape &new_shape)
-{
-  deallocate();
-  _shape = new_shape;
-}
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp

index 5ad9beb30b1548cc430f7fcbaa887c469913c92d..847b65667402f3433b07382ebff7a6e5c4ce6f8d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Add.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,6 +28,14 @@ namespace
  
  using namespace testing;
  
+class AddTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
  // for quantized Add, the error shouldn't exceed step
  float GetTolerance(float min, float max)
  {
@@ -34,7 +43,7 @@ float GetTolerance(float min, float max)
    return kQuantizedStep;
  }
  
-TEST(AddTest, Uint8)
+TEST_F(AddTest, Uint8)
  {
    std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
    std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
@@ -57,10 +66,10 @@ TEST(AddTest, Uint8)
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
    for (int i = 0; i < output_data.size(); i++)
    {
-    Tensor input1_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
-    Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
-                                                         quant_param.second, test_data);
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
      Tensor output_tensor =
        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
@@ -69,6 +78,7 @@ TEST(AddTest, Uint8)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -78,10 +88,10 @@ TEST(AddTest, Uint8)
    // Re-run with exchanged inputs.
    for (int i = 0; i < output_data.size(); i++)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
-                                                         quant_param.second, test_data);
-    Tensor input2_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
      Tensor output_tensor =
        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
@@ -90,6 +100,7 @@ TEST(AddTest, Uint8)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -98,7 +109,7 @@ TEST(AddTest, Uint8)
    }
  }
  
-TEST(AddTest, Float)
+TEST_F(AddTest, Float)
  {
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -116,8 +127,10 @@ TEST(AddTest, Float)
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
      AddParams params{};
@@ -125,6 +138,7 @@ TEST(AddTest, Float)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -133,8 +147,10 @@ TEST(AddTest, Float)
    // Re-run with exchanged inputs.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
      AddParams params{};
@@ -142,6 +158,7 @@ TEST(AddTest, Float)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -149,7 +166,7 @@ TEST(AddTest, Float)
    }
  }
  
-TEST(AddTest, SInt16)
+TEST_F(AddTest, SInt16)
  {
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -171,9 +188,10 @@ TEST(AddTest, SInt16)
  
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
-    Tensor input2_tensor =
-      makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
      const float tolerance = output_tensor.scale();
  
@@ -182,6 +200,7 @@ TEST(AddTest, SInt16)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorShape(output_tensor),
@@ -193,9 +212,10 @@ TEST(AddTest, SInt16)
    // Re-run with exchanged inputs and different scales.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor =
-      makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
      const float tolerance = output_tensor.scale();
  
@@ -204,6 +224,7 @@ TEST(AddTest, SInt16)
  
      Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorShape(output_tensor),
@@ -214,10 +235,10 @@ TEST(AddTest, SInt16)
    }
  }
  
-TEST(AddTest, Input_Output_Type_NEG)
+TEST_F(AddTest, Input_Output_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    AddParams params{};
@@ -227,10 +248,10 @@ TEST(AddTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Input_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    AddParams params{};
@@ -238,6 +259,7 @@ TEST(AddTest, Invalid_Input_Type_NEG)
  
    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp

index 2437d5762ac7f431d8066cd3c78a9e45679c97c8..6561a1783cd7323bb23ae38c267096fd7b0a3486 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
@@ -16,7 +16,7 @@
  
  #include "kernels/ArgMax.h"
  #include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALArgMax.h"
  
  namespace luci_interpreter
  {
@@ -60,10 +60,10 @@ void ArgMax::configure()
  void ArgMax::execute() const
  {
  
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                        \
-  tflite::optimized_ops::ArgMinMax(                                                               \
-    getTensorShape(input()), getTensorData<data_type>(input()), getTensorData<axis_type>(axis()), \
-    getTensorShape(output()), getTensorData<output_type>(output()), std::greater<data_type>())
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
+  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                  getTensorData<output_type>(output()), std::greater<data_type>())
    if (axis()->element_type() == DataType::S32)
    {
      switch (_params.output_type)
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp

index 3362edbf69b6a0495c19647418d05d273f52d205..119c69ccf0ad417dd4cd059c9f49e3a56efb4a9f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/ArgMax.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -32,15 +33,19 @@ void Check(std::initializer_list<int32_t> input_shape,
             std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
             std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T1>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
-  Tensor dimension_tensor = makeInputTensor<DataType::S32>(dimension_shape, dimension_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor dimension_tensor =
+    makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(getElementType<T2>());
  
    ArgMaxParams params{};
    params.output_type = getElementType<T2>();
    ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -94,17 +99,21 @@ TYPED_TEST(ArgMaxTest, MultiDimensions)
  
  TEST(ArgMaxTest, UnsupportedType_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
-                                                                           1, 2, 7, 8, //
-                                                                           1, 9, 7, 3, //
-                                                                         });
-  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    ArgMaxParams params{};
    params.output_type = DataType::U8;
    ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp

index 65ea4c09e7edb0b295ed351fc468caa9b827d18d..5545fb4d4de15264d597fad8a4e3dc7b0d161e61 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -70,6 +70,11 @@ void AveragePool2D::configure()
      LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
      LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
    }
+  else if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
    output()->resize({batches, output_height, output_width, depth});
  }
  
@@ -86,6 +91,9 @@ void AveragePool2D::execute() const
      case DataType::S16:
        evalSInt16();
        break;
+    case DataType::S8:
+      evalSInt8();
+      break;
      default:
        throw std::runtime_error("Unsupported type.");
    }
@@ -132,6 +140,26 @@ void AveragePool2D::evalQuantized() const
                                       getTensorData<uint8_t>(output()));
  }
  
+void AveragePool2D::evalSInt8() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+    getTensorData<int8_t>(output()));
+}
+
  void AveragePool2D::evalSInt16() const
  {
    int32_t activation_min{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h

index 282a5879792a796de01e1858d6dd904f7b7d0b18..b98367f3115da143fba803b3c48b26dc9b942d19 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -40,6 +40,7 @@ private:
    void evalFloat() const;
    void evalQuantized() const;
    void evalSInt16() const;
+  void evalSInt8() const;
  
  private:
    int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp

index 4d7dab86aa78986057b6b4fa1cfe5a1e8bb3f1ae..7ed421129f99e47c17926bf67eb8db371e018d81 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/AveragePool2D.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(AveragePool2DTest, Float)
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
@@ -34,7 +43,8 @@ TEST(AveragePool2DTest, Float)
      1,  2,  3,  4,  5,  //
      6,  7,  8,  9,  10, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(AveragePool2DTest, Float)
  
    AveragePool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -57,15 +68,15 @@ TEST(AveragePool2DTest, Float)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
  }
  
-TEST(AveragePool2DTest, Uint8_0)
+TEST_F(AveragePool2DTest, Uint8_0)
  {
    std::vector<float> input_data{
      0,  -6, 12, 4, //
      -3, -2, 10, 7, //
    };
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -78,13 +89,14 @@ TEST(AveragePool2DTest, Uint8_0)
  
    AveragePool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
  }
  
-TEST(AveragePool2DTest, Uint8_1)
+TEST_F(AveragePool2DTest, Uint8_1)
  {
    std::vector<float> input_data{
      0, 6, 12, 4, //
@@ -92,8 +104,8 @@ TEST(AveragePool2DTest, Uint8_1)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -106,13 +118,14 @@ TEST(AveragePool2DTest, Uint8_1)
  
    AveragePool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
  }
  
-TEST(AveragePool2DTest, SInt16)
+TEST_F(AveragePool2DTest, SInt16)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
@@ -125,7 +138,8 @@ TEST(AveragePool2DTest, SInt16)
      0, 1.5, //
      4.5, 6, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
    Pool2DParams params{};
@@ -138,13 +152,47 @@ TEST(AveragePool2DTest, SInt16)
  
    AveragePool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
+TEST_F(AveragePool2DTest, SInt8)
+{
+  Shape input_shape{1, 4, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{-7, -3, 0,  2, -5, 12, -15, 3,  10, 5,
+                                7,  -6, -1, 9, -2, 0,  -5,  11, -1, -7};
+  std::vector<float> ref_output_data{
+    0, 2.5, //
+    1, 1.5, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
  {
    Shape input_shape{1, 3, 5};
    std::vector<float> input_data{
@@ -152,7 +200,8 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
      1,  2,  3,  4,  5,  //
      6,  7,  8,  9,  10, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -167,7 +216,7 @@ TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(AveragePool2DTest, In_Out_Type_NEG)
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
@@ -175,7 +224,8 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
      1,  2,  3,  4,  5,  //
      6,  7,  8,  9,  10, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Pool2DParams params{};
@@ -190,7 +240,7 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(AveragePool2DTest, Quant_Param_NEG)
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
  {
    std::vector<float> input_data{
      0,  -6, 12, 4, //
@@ -199,8 +249,8 @@ TEST(AveragePool2DTest, Quant_Param_NEG)
  
    std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
    std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param1.first,
-                                                      quant_param1.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
  
    Pool2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp

index 591fcc00ac6fec2c29e13247bb8c24c9e4f52667..bd315ff7b47f6c9ed3b6a7761cc231be9c998312 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -18,7 +18,7 @@
  #include "kernels/BatchToSpaceND.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALBatchToSpaceND.h"
  
  #include <stdexcept>
  
@@ -83,13 +83,13 @@ void BatchToSpaceND::execute() const
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::BatchToSpaceND(
+      luci_interpreter_pal::BatchToSpaceND(
          getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
          getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
          getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::BatchToSpaceND(
+      luci_interpreter_pal::BatchToSpaceND(
          getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
          getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
          getTensorData<int32_t>(crops()), getTensorShape(output()),
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp

index a29981d1726203e6856c2f5c8aec5bb3bea02fba..f3a34497448e9f1534b32f82b8cee4e70ccf8807 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/BatchToSpaceND.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -33,14 +34,19 @@ void Check(std::initializer_list<int32_t> input_shape,
             std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
             std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
-  Tensor crops_tensor = makeInputTensor<DataType::S32>(crops_shape, crops_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor crops_tensor =
+    makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -65,10 +71,11 @@ TYPED_TEST(BatchToSpaceNDTest, Simple)
  
  TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
-  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
@@ -77,10 +84,11 @@ TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
  
  TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
-    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
-  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0});
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt

index 9801e11af2eaa6a2e4a596f8f41b6ff2dfa15e59..1b7d0f66a8e288262187baf129cca25fffa536f4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -1,158 +1,27 @@
-find_package(Threads REQUIRED)
-
  set(SOURCES
-    Add.h
-    Add.cpp
-    ArgMax.h
-    ArgMax.cpp
-    AveragePool2D.h
-    AveragePool2D.cpp
-    BatchToSpaceND.h
-    BatchToSpaceND.cpp
-    Cast.h
-    Cast.cpp
-    Concatenation.h
-    Concatenation.cpp
-    Conv2D.h
-    Conv2D.cpp
-    DepthToSpace.h
-    DepthToSpace.cpp
-    DepthwiseConv2D.h
-    DepthwiseConv2D.cpp
-    Div.h
-    Div.cpp
-    Elu.h
-    Elu.cpp
-    Exp.h
-    Exp.cpp
-    Floor.h
-    Floor.cpp
-    FloorDiv.h
-    FloorDiv.cpp
-    Equal.h
-    Equal.cpp
-    FullyConnected.h
-    FullyConnected.cpp
-    Greater.h
-    Greater.cpp
-    GreaterEqual.h
-    GreaterEqual.cpp
-    If.h
-    If.cpp
-    InstanceNorm.h
-    InstanceNorm.cpp
-    L2Normalize.h
-    L2Normalize.cpp
-    L2Pool2D.h
-    L2Pool2D.cpp
-    LeakyRelu.h
-    LeakyRelu.cpp
-    Less.h
-    Less.cpp
-    LessEqual.h
-    LessEqual.cpp
-    LocalResponseNormalization.h
-    LocalResponseNormalization.cpp
-    LogicalAnd.h
-    LogicalAnd.cpp
-    LogicalNot.h
-    LogicalNot.cpp
-    LogicalOr.h
-    LogicalOr.cpp
-    Logistic.h
-    Logistic.cpp
-    LogSoftmax.h
-    LogSoftmax.cpp
-    Maximum.h
-    Maximum.cpp
-    MaxPool2D.h
-    MaxPool2D.cpp
-    Mean.h
-    Mean.cpp
-    Minimum.h
-    Minimum.cpp
-    MirrorPad.h
-    MirrorPad.cpp
-    Mul.h
-    Mul.cpp
-    Neg.h
-    Neg.cpp
-    NotEqual.h
-    NotEqual.cpp
-    Pack.h
-    Pack.cpp
-    Pad.h
-    Pad.cpp
-    PadV2.h
-    PadV2.cpp
-    Pow.h
-    Pow.cpp
-    PRelu.h
-    PRelu.cpp
-    Relu.h
-    Relu.cpp
-    Relu6.h
-    Relu6.cpp
-    Reshape.h
-    Reshape.cpp
-    ResizeBilinear.h
-    ResizeBilinear.cpp
-    ResizeNearestNeighbor.h
-    ResizeNearestNeighbor.cpp
-    ReverseV2.h
-    ReverseV2.cpp
-    Rsqrt.h
-    Rsqrt.cpp
-    Slice.h
-    Slice.cpp
-    Softmax.h
-    Softmax.cpp
-    SpaceToBatchND.h
-    SpaceToBatchND.cpp
-    SpaceToDepth.h
-    SpaceToDepth.cpp
-    Split.h
-    Split.cpp
-    StridedSlice.h
-    StridedSlice.cpp
-    Sqrt.h
-    Sqrt.cpp
-    Square.h
-    Square.cpp
-    SquaredDifference.h
-    SquaredDifference.cpp
-    Squeeze.h
-    Squeeze.cpp
-    Sub.h
-    Sub.cpp
-    Tanh.h
-    Tanh.cpp
-    Transpose.h
-    Transpose.cpp
-    TransposeConv.h
-    TransposeConv.cpp
-    Unpack.h
-    Unpack.cpp
-    While.h
-    While.cpp)
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "${NODE}.h")
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
  
-list(APPEND SOURCES
-    BinaryOpCommon.h
-    Utils.h
-    Utils.cpp
-    ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
  
-add_library(luci_interpreter_kernels STATIC ${SOURCES})
-set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
-target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
-    "${TensorFlowRuySource_DIR}"
-    "${TensorFlowGEMMLowpSource_DIR}"
-    "${TensorFlowEigenSource_DIR}"
-    "${TensorFlowSource_DIR}")
-target_link_libraries(luci_interpreter_kernels
-    PUBLIC luci_interpreter_core
-    PRIVATE nncc_common Threads::Threads)
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
  
  if(NOT ENABLE_TEST)
    return()
@@ -160,75 +29,13 @@ endif(NOT ENABLE_TEST)
  
  nnas_find_package(GTest REQUIRED)
  
-set(TEST_SOURCES
-    Add.test.cpp
-    ArgMax.test.cpp
-    AveragePool2D.test.cpp
-    BatchToSpaceND.test.cpp
-    Cast.test.cpp
-    Concatenation.test.cpp
-    Conv2D.test.cpp
-    DepthToSpace.test.cpp
-    DepthwiseConv2D.test.cpp
-    Div.test.cpp
-    Elu.test.cpp
-    Exp.test.cpp
-    Floor.test.cpp
-    FloorDiv.test.cpp
-    Equal.test.cpp
-    FullyConnected.test.cpp
-    Greater.test.cpp
-    GreaterEqual.test.cpp
-    If.test.cpp
-    InstanceNorm.test.cpp
-    L2Normalize.test.cpp
-    L2Pool2D.test.cpp
-    LeakyRelu.test.cpp
-    Less.test.cpp
-    LessEqual.test.cpp
-    LocalResponseNormalization.test.cpp
-    LogicalAnd.test.cpp
-    LogicalNot.test.cpp
-    LogicalOr.test.cpp
-    Logistic.test.cpp
-    LogSoftmax.test.cpp
-    Maximum.test.cpp
-    MaxPool2D.test.cpp
-    Mean.test.cpp
-    Minimum.test.cpp
-    Mul.test.cpp
-    Neg.test.cpp
-    NotEqual.test.cpp
-    Pack.test.cpp
-    Pad.test.cpp
-    PadV2.test.cpp
-    Pow.test.cpp
-    PRelu.test.cpp
-    Relu.test.cpp
-    Relu6.test.cpp
-    Reshape.test.cpp
-    ResizeBilinear.test.cpp
-    ResizeNearestNeighbor.test.cpp
-    ReverseV2.test.cpp
-    Rsqrt.test.cpp
-    Slice.test.cpp
-    Softmax.test.cpp
-    SpaceToBatchND.test.cpp
-    SpaceToDepth.test.cpp
-    Split.test.cpp
-    StridedSlice.test.cpp
-    Sqrt.test.cpp
-    Square.test.cpp
-    SquaredDifference.test.cpp
-    Squeeze.test.cpp
-    Sub.test.cpp
-    Tanh.test.cpp
-    Transpose.test.cpp
-    TransposeConv.test.cpp
-    Unpack.test.cpp
-    While.test.cpp)
+macro(REGISTER_KERNEL NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
  
  list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
  
-GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels)
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp

index 42944628d8548897d53b94e92dc40dac2741b473..731260522773ebdab234e8dbbcff96fd05dd3d39 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Cast.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,59 +31,209 @@ template <typename T1, typename T2>
  void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
             std::initializer_list<T2> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType input_type = getElementType<T1>();
    constexpr DataType output_type = getElementType<T2>();
  
-  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data);
+  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(output_type);
  
    Cast kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), shape);
  }
  
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+                 std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = loco::DataType::BOOL;
+  constexpr DataType output_type = getElementType<T>();
+  std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+  for (auto elem : input_data)
+  {
+    input_data_converted.push_back(elem);
+  }
+
+  Tensor input_tensor =
+    makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
  template <typename T> class CastTest : public ::testing::Test
  {
  };
  
-using DataTypes = ::testing::Types<uint8_t, int32_t, int64_t>;
-TYPED_TEST_CASE(CastTest, DataTypes);
+using IntDataTypes =
+  ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_CASE(CastTest, IntDataTypes);
  
  TYPED_TEST(CastTest, FloatToInt)
  {
    Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
                            /*input_data=*/
                            {
-                            1.43f, 9.99f, 7.0f, 3.12f, //
+                            1.0f, 9.0f, 7.0f, 3.0f, //
                            },
                            /*output_data=*/
                            {
                              1, 9, 7, 3, //
                            });
-  Check<TypeParam, TypeParam>(/*shape=*/{1, 1, 1, 4},
-                              /*input_data=*/
-                              {
-                                1, 9, 7, 3, //
-                              },
-                              /*output_data=*/
-                              {
-                                1, 9, 7, 3, //
-                              });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+  Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          },
+                          /*output_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          });
+  SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+  Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+                /*input_data=*/
+                {
+                  1, 9, 7, 3, //
+                },
+                /*output_data=*/
+                {
+                  1, 9, 7, 3, //
+                });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+  check_int<TypeParam, uint8_t>();
+  check_int<TypeParam, uint16_t>();
+  check_int<TypeParam, uint32_t>();
+  check_int<TypeParam, uint64_t>();
+  check_int<TypeParam, int8_t>();
+  check_int<TypeParam, int16_t>();
+  check_int<TypeParam, int32_t>();
+  check_int<TypeParam, int64_t>();
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+  Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           1, 0, 7, 0, //
+                         },
+                         /*output_data=*/
+                         {
+                           true, false, true, false, //
+                         });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+  CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           true, false, false, true, //
+                         },
+                         /*output_data=*/
+                         {
+                           1, 0, 0, 1, //
+                         });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+  Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       1.0f, 0.0f, 7.0f, 0.0f, //
+                     },
+                     /*output_data=*/
+                     {
+                       true, false, true, false, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+  CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       true, false, false, true, //
+                     },
+                     /*output_data=*/
+                     {
+                       1.0f, 0.0f, 0.0f, 1.0f, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+  Check<float, float>(/*shape=*/{1, 1, 1, 4},
+                      /*input_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      },
+                      /*output_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+  CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+                    /*input_data=*/
+                    {
+                      true, true, false, false, //
+                    },
+                    /*output_data=*/
+                    {
+                      true, true, false, false, //
+                    });
+  SUCCEED();
  }
  
  TEST(CastTest, UnsupportedType_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4}, {
-                                                                           1, 2, 7, 8, //
-                                                                           1, 9, 7, 3, //
-                                                                         });
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::Unknown);
  
    Cast kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
+  SUCCEED();
  }
  
  } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp

index e3376c13d942b246e708b916c9e1423be7bc2e06..7cfdf34b9e4599d337b40599caf82bdb3824e210 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -18,7 +18,7 @@
  #include "kernels/Concatenation.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
  
  #include <stdexcept>
  
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp

index ee9b7d0d3076fb64218f4fbd69da2e18518cd6fd..e4b50611ab25d512c7099c9926d0cb14e070b99f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Concatenation.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,12 +27,22 @@ namespace
  
  using namespace testing;
  
-TEST(ConcatenationTest, Float)
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
@@ -42,6 +53,10 @@ TEST(ConcatenationTest, Float)
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
+    for (auto t : kernel.getOutputTensors())
+    {
+      _memory_manager->allocate_memory(*t);
+    }
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -53,6 +68,7 @@ TEST(ConcatenationTest, Float)
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -64,6 +80,7 @@ TEST(ConcatenationTest, Float)
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -75,6 +92,7 @@ TEST(ConcatenationTest, Float)
  
      Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -82,7 +100,7 @@ TEST(ConcatenationTest, Float)
    }
  }
  
-TEST(ConcatenationTest, Input_Number_Check_NEG)
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
  {
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
@@ -94,12 +112,14 @@ TEST(ConcatenationTest, Input_Number_Check_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ConcatenationTest, Invalid_Axis_NEG)
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
@@ -110,12 +130,13 @@ TEST(ConcatenationTest, Invalid_Axis_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
@@ -126,12 +147,14 @@ TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
@@ -142,12 +165,14 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
@@ -158,12 +183,12 @@ TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
+TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
  {
    std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data);
+  Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S8);
    ConcatenationParams params{};
  
@@ -175,12 +200,14 @@ TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
  }
  
  // TODO: Remove this test when concat w/ fused_activation is supported
-TEST(ConcatenationTest, With_Fused_Activation_NEG)
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
  {
    std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
    std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    ConcatenationParams params{};
  
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp

index 56ca96a349ab94c52e43743387c1534a1a6bc8fa..fb5e063a9649e984212a54d9ac258f2a249ceb31 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include "PALConv2d.h"
  
  #include <stdexcept>
  #include <thread>
@@ -30,8 +30,8 @@ namespace kernels
  {
  
  Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-               const Conv2DParams &params)
-  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+               Tensor *im2col, const Conv2DParams &params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params)
  {
  }
  
@@ -45,7 +45,7 @@ void Conv2D::configure()
    // (3) | uint8 uint8  int32 uint8  | quantized
    // (4) | int8  int8   int32 int8   | quantized per channel
    //
-  // We only support (1) and (3) for now, and additionally the following:
+  // We only support (1), (3) and (4) for now, and additionally the following:
    //     | input filter bias  output |
    // ----+---------------------------+
    // (5) | int16 int16  int64 int16  |
@@ -58,6 +58,17 @@ void Conv2D::configure()
    {
      LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
    }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                           static_cast<size_t>(filter()->shape().dim(0)));
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
    else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
    {
      LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
@@ -103,23 +114,20 @@ void Conv2D::configure()
      _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
    const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
                                         filter_height != 1 || filter_width != 1;
-  const bool need_im2col =
+  _need_im2col =
      input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
-  if (need_im2col)
+  if (_need_im2col)
    {
      const int input_depth = input_shape.dim(3);
      Shape im2col_shape{batches, output_height, output_width,
                         input_depth * filter_height * filter_width};
-    try
-    {
-      _im2col =
-        std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
-    }
-    catch (std::bad_alloc &ba)
-    {
-      // Failed memory allocation
-      _im2col = nullptr;
-    }
+    auto im2col = getOutputTensors()[1];
+    im2col->resize(im2col_shape);
+  }
+  else
+  {
+    auto im2col = getOutputTensors()[1];
+    im2col->set_allocatable(false);
    }
  }
  
@@ -147,14 +155,15 @@ void Conv2D::execute() const
          evalQuantizedPerChannel();
        }
        break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
      case DataType::S16:
        evalQuantizedS16();
        break;
      default:
        throw std::runtime_error("Unsupported type.");
    }
-  if (!!_im2col)
-    _im2col->deallocate();
  }
  
  void Conv2D::evalFloat() const
@@ -173,32 +182,16 @@ void Conv2D::evalFloat() const
    params.float_activation_min = activation_min;
    params.float_activation_max = activation_max;
  
-  if (_im2col)
+  float *im2col_data = nullptr;
+  auto im2col = getOutputTensors()[1];
+  if (_need_im2col)
    {
-    try
-    {
-      tflite::optimized_ops::Conv(
-        params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-        getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-        getTensorShape(output()), getTensorData<float>(output()), getTensorShape(_im2col.get()),
-        getTensorData<float>(_im2col.get()));
-    }
-    catch (std::bad_alloc &ba)
-    {
-      // Failed memory allocation
-      _im2col->deallocate();
-
-      tflite::reference_ops::Conv(
-        params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-        getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-        getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
-    }
+    im2col_data = im2col->data<float>();
    }
-  else
-    tflite::reference_ops::Conv(
-      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(), nullptr);
+  luci_interpreter_pal::Conv(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data);
  }
  
  void Conv2D::evalQuantized() const
@@ -232,16 +225,12 @@ void Conv2D::evalQuantized() const
    params.quantized_activation_min = activation_min;
    params.quantized_activation_max = activation_max;
  
-  // TODO This should only be done once (although it takes only a few microseconds).
-  //  Also, the user should be able to adjust the number of threads.
-  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
-  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
-
-  tflite::optimized_ops::Conv(
-    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
-    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-    getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
-    getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+  auto im2col = getOutputTensors()[1];
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                             getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+                             getTensorShape(bias()), getTensorData<int32_t>(bias()),
+                             getTensorShape(output()), getTensorData<uint8_t>(output()),
+                             getTensorShape(im2col), getTensorData<uint8_t>(im2col));
  }
  
  void Conv2D::evalQuantizedPerChannel() const
@@ -330,6 +319,54 @@ void Conv2D::evalQuantizedPerChannel() const
    }
  }
  
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;                    // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  int8_t *im2col_data = nullptr;
+  auto im2col = getOutputTensors()[1];
+  if (_need_im2col)
+  {
+    im2col_data = im2col->data<int8_t>();
+  }
+
+  luci_interpreter_pal::ConvPerChannel(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data);
+}
+
  void Conv2D::evalQuantizedS16() const
  {
    const auto *input_data = getTensorData<int16_t>(input());
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h

index 86f73c251fbce96750e0ff63eeaccf8216a9523e..5f13176386ab63815a03fb4197b7f0f0a84053cd 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
  {
  public:
    Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
-         const Conv2DParams &params);
+         Tensor *im2col, const Conv2DParams &params);
  
    const Tensor *input() const { return _inputs[0]; }
    const Tensor *filter() const { return _inputs[1]; }
@@ -45,10 +45,11 @@ private:
    void evalFloat() const;
    void evalQuantized() const;
    void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
    void evalQuantizedS16() const;
  
  private:
-  std::unique_ptr<Tensor> _im2col;
+  bool _need_im2col = false;
    int32_t _padding_height{};
    int32_t _padding_width{};
  };
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp

index 8610a4fe636ad87e258ff462b172d71802ad5b28..277c280f54341649e9b1d2ab6d606d0dba3a12e9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Conv2D.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(Conv2DTest, Float)
+class Conv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
  {
    Shape input_shape{1, 4, 3, 2};
    Shape filter_shape{2, 2, 2, 2};
@@ -44,9 +53,13 @@ TEST(Conv2DTest, Float)
      -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -57,8 +70,10 @@ TEST(Conv2DTest, Float)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -70,7 +85,55 @@ TEST(Conv2DTest, Float)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(Conv2DTest, FloatCheck)
+TEST_F(Conv2DTest, FloatPointwise)
+{
+  Shape input_shape{1, 2, 2, 2};
+  Shape filter_shape{2, 1, 1, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1, 2, // row = 0, col = 0
+    3, 4, // row = 0, col = 1
+    5, 6, // row = 1, col = 0
+    7, 8, // row = 1, col = 1
+  };
+  std::vector<float> filter_data{
+    -1, 2, // out = 0
+    -3, 4, // out = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    4, 7,  6,  9,  // row = 0
+    8, 11, 10, 13, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
  {
    Shape input_shape{2, 2, 4, 1};
    Shape filter_shape{3, 2, 2, 1};
@@ -89,9 +152,13 @@ TEST(Conv2DTest, FloatCheck)
      -1, -1, 1,  1, // third 2x2 filter
    };
    std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -102,8 +169,10 @@ TEST(Conv2DTest, FloatCheck)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -117,7 +186,7 @@ TEST(Conv2DTest, FloatCheck)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(Conv2DTest, Uint8)
+TEST_F(Conv2DTest, Uint8)
  {
    std::vector<float> input_data{
      // First batch
@@ -137,12 +206,15 @@ TEST(Conv2DTest, Uint8)
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
    std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first,
-                                                       input_quant_param.second, filter_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
    Tensor bias_tensor = makeInputTensor<DataType::S32>(
-    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
@@ -154,8 +226,10 @@ TEST(Conv2DTest, Uint8)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -169,7 +243,7 @@ TEST(Conv2DTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(Conv2DTest, Uint8_CWQ)
+TEST_F(Conv2DTest, Uint8_CWQ)
  {
    const int output_channels = 3;
    std::vector<float> input_data{
@@ -209,12 +283,14 @@ TEST(Conv2DTest, Uint8_CWQ)
      bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
    std::vector<int32_t> zerop(output_channels, 0);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor filter_tensor =
-    makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 0, filter_data);
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
@@ -226,8 +302,10 @@ TEST(Conv2DTest, Uint8_CWQ)
    params.dilation_width_factor = 1;
    params.activation = Activation::NONE;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -241,7 +319,83 @@ TEST(Conv2DTest, Uint8_CWQ)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(Conv2DTest, SInt16)
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
  {
    Shape input_shape{1, 4, 3, 2};
    Shape filter_shape{2, 2, 2, 2};
@@ -266,9 +420,13 @@ TEST(Conv2DTest, SInt16)
      0,  40, 0, 44, // row = 1
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
    Conv2DParams params{};
@@ -279,15 +437,17 @@ TEST(Conv2DTest, SInt16)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(Conv2DTest, SInt16_CWQ_weights)
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
  {
    Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
    Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
@@ -321,10 +481,13 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
      bias_scales.push_back(filter_scales[i] * input_scale);
    std::vector<int32_t> zerop = {0, 0, 0};
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
-  Tensor filter_tensor =
-    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
  
    Conv2DParams params{};
@@ -335,15 +498,17 @@ TEST(Conv2DTest, SInt16_CWQ_weights)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
  {
    Shape input_shape{1, 4, 3, 2};
    Shape filter_shape{2, 2, 2, 2};
@@ -361,9 +526,13 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
      -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::S32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -374,11 +543,11 @@ TEST(Conv2DTest, Unsupported_Type_Configure_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(Conv2DTest, Invalid_Bias_Type_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
  {
    Shape input_shape{1, 4, 3, 2};
    Shape filter_shape{2, 2, 2, 2};
@@ -396,9 +565,12 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
      -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<uint8_t> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -409,11 +581,11 @@ TEST(Conv2DTest, Invalid_Bias_Type_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(Conv2DTest, Invalid_Bias_Data_NEG)
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
  {
    Shape input_shape{1, 4, 3, 2};
    Shape filter_shape{2, 2, 2, 2};
@@ -431,9 +603,13 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
      -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2, 3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -444,11 +620,11 @@ TEST(Conv2DTest, Invalid_Bias_Data_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(Conv2DTest, Invalid_Input_Shape_NEG)
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
  {
    Shape input_shape{1, 4, 6, 1};
    Shape filter_shape{2, 2, 2, 2};
@@ -466,9 +642,13 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
      -8, -6, 7,  5,  // out = 1, row = 1
    };
    std::vector<float> bias_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Conv2DParams params{};
@@ -479,7 +659,7 @@ TEST(Conv2DTest, Invalid_Input_Shape_NEG)
    params.dilation_width_factor = 1;
    params.activation = Activation::RELU;
  
-  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp

index f2b9e4ccc85a93b18aeed0a30bab975f2cea2a21..3a9acd1d400372cc47137481139105c1607013e9 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -16,7 +16,7 @@
  
  #include "DepthToSpace.h"
  #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALDepthToSpace.h"
  
  namespace luci_interpreter
  {
@@ -62,14 +62,14 @@ void DepthToSpace::execute() const
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
-                                          getTensorData<float>(input()), getTensorShape(output()),
-                                          getTensorData<float>(output()));
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::DepthToSpace(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported Type.");
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp

index 3dee4ad36647db13fc7eed552e4e81b68a616c98..9b1c09ba9bb05bd7d5db41b328bf9b5bbdf80a1b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/DepthToSpace.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -35,12 +36,14 @@ TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
  
  TYPED_TEST(DepthToSpaceTest, SimpleCase)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
    Shape input_shape{1, 1, 2, 4};
    std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
    std::vector<int32_t> output_shape{1, 2, 4, 1};
  
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
  
    DepthToSpaceParams params{};
@@ -48,6 +51,7 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
  
    DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
@@ -57,10 +61,12 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
  
  TEST(DepthToSpaceTest, InvalidInputShape_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
    Shape input_shape{1, 2, 4};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthToSpaceParams params{};
@@ -72,10 +78,12 @@ TEST(DepthToSpaceTest, InvalidInputShape_NEG)
  
  TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
    Shape input_shape{1, 1, 2, 4};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    DepthToSpaceParams params{};
@@ -87,10 +95,12 @@ TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
  
  TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
    Shape input_shape{1, 1, 2, 4};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthToSpaceParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp

index 1452f44212c94f296feeb509ac35bd7b39089b75..f2dbf6c68c46d82f38ef31afb06e3821b2fa310f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -20,6 +20,7 @@
  
  #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
  #include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
  
  #include <stdexcept>
  
@@ -45,7 +46,7 @@ void DepthwiseConv2D::configure()
    // (4) | int8  int8   int32 int8   | quantized per channel
    // (5) | int16 int8   int64 int16  | quantized per channel 16x8
    //
-  // We only support (1) and (3) for now, and additionally the following:
+  // We only support (1), (3) and (4) for now, and additionally the following:
    //     | input filter bias  output |
    // ----+---------------------------+
    // (5) | int16 int16  int64 int16  |
@@ -58,6 +59,17 @@ void DepthwiseConv2D::configure()
    {
      LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
    }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
    else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
    {
      LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
@@ -123,6 +135,9 @@ void DepthwiseConv2D::execute() const
          evalQuantizedPerChannel();
        }
        break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
      case DataType::S16:
        evalQuantizedS16();
        break;
@@ -283,6 +298,52 @@ void DepthwiseConv2D::evalQuantized() const
      getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()));
+}
+
  void DepthwiseConv2D::evalQuantizedS16() const
  {
    const auto *input_data = getTensorData<int16_t>(input());
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h

index 6d700dd0fc56dc2b6375b6795de5d30d867849d7..6cffd6583a41ad2ea9805eb54c1e54d4762c394e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -43,6 +43,7 @@ private:
    void evalFloat() const;
    void evalQuantized() const;
    void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
    void evalQuantizedS16() const;
  
  private:
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp

index 3e2f434dd1474a574f75dbc3de175ae43c8ba9ff..74975899a3db6a3d6fc9613e01926f626becef7e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/DepthwiseConv2D.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(DepthwiseConv2DTest, Float)
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{1, 2, 2, 4};
@@ -44,9 +53,12 @@ TEST(DepthwiseConv2DTest, Float)
      13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
@@ -60,6 +72,7 @@ TEST(DepthwiseConv2DTest, Float)
  
    DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -70,7 +83,7 @@ TEST(DepthwiseConv2DTest, Float)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
  }
  
-TEST(DepthwiseConv2DTest, Uint8)
+TEST_F(DepthwiseConv2DTest, Uint8)
  {
    std::vector<float> input_data{
      1, 2, 7,  8,  // column 1
@@ -88,12 +101,14 @@ TEST(DepthwiseConv2DTest, Uint8)
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
    std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first,
-                                                       input_quant_param.second, filter_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
    Tensor bias_tensor = makeInputTensor<DataType::S32>(
-    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
@@ -108,6 +123,7 @@ TEST(DepthwiseConv2DTest, Uint8)
  
    DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -118,7 +134,7 @@ TEST(DepthwiseConv2DTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
  }
  
-TEST(DepthwiseConv2DTest, SInt16)
+TEST_F(DepthwiseConv2DTest, SInt16)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{1, 2, 2, 4};
@@ -143,9 +159,12 @@ TEST(DepthwiseConv2DTest, SInt16)
      167, 0, 227, 28, //
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
    DepthwiseConv2DParams params{};
@@ -159,13 +178,14 @@ TEST(DepthwiseConv2DTest, SInt16)
  
    DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
  {
    const int output_channels = 4;
    Shape input_shape{1, 4, 2, 2};
@@ -197,10 +217,12 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
    for (int i = 0; i < output_channels; ++i)
      bias_scales.push_back(filter_scales[i] * input_scale);
    std::vector<int32_t> zerop(4, 0);
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
-  Tensor filter_tensor =
-    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
    DepthwiseConv2DParams params{};
@@ -214,13 +236,14 @@ TEST(DepthwiseConv2DTest, SInt16_CWQ_weights)
  
    DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
  {
    const int output_channels = 4;
    Shape input_shape{1, 3, 2, 2};
@@ -267,11 +290,13 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
      bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
    std::vector<int32_t> zerop(output_channels, 0);
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor filter_tensor =
-    makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops, 3, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
@@ -286,6 +311,7 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
  
    DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -293,7 +319,83 @@ TEST(DepthwiseConv2DTest, Uint8_CWQ_weights)
                FloatArrayNear(ref_output_data, output_quant_param.first));
  }
  
-TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{1, 2, 2, 4};
@@ -311,9 +413,11 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
      13, -14, 15,  -16, //
    };
    std::vector<int32_t> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
@@ -329,7 +433,7 @@ TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{1, 2, 2, 4};
@@ -347,9 +451,12 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
      13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    DepthwiseConv2DParams params{};
@@ -365,7 +472,7 @@ TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
  {
    Shape input_shape{4, 2, 2};
    Shape filter_shape{2, 2, 4};
@@ -383,9 +490,12 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
      13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
@@ -401,7 +511,7 @@ TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{2, 1, 2, 4};
@@ -419,9 +529,12 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
      13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
@@ -437,7 +550,7 @@ TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
  {
    Shape input_shape{1, 4, 2, 2};
    Shape filter_shape{1, 2, 4, 2};
@@ -455,9 +568,12 @@ TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
      13, -14, 15,  -16, //
    };
    std::vector<float> bias_data{1, 2, 3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DepthwiseConv2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp

index db1496d376e86668995d5ce6673feb19aaee4328..0e52ba1f0aaa1a600edebb2486bd19cbf722637a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -18,7 +18,8 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp

index 1a0c4af1582f6db9f58a86154be7d66eb609c84e..021d68d068230db38054512823e241ffcf63fde7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Div.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,6 +28,14 @@ namespace
  
  using namespace testing;
  
+class DivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
  float GetTolerance(float min, float max)
  {
    const float kQuantizedStep = (max - min) / 255.0f;
@@ -34,7 +43,7 @@ float GetTolerance(float min, float max)
    return kQuantizedTolerance;
  }
  
-TEST(DivTest, Float)
+TEST_F(DivTest, Float)
  {
    Shape base_shape = {2, 3, 1, 1};
  
@@ -44,8 +53,10 @@ TEST(DivTest, Float)
    std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
    std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
  
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
@@ -54,13 +65,14 @@ TEST(DivTest, Float)
  
    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
  }
  
-TEST(DivTest, FloatBroadcast)
+TEST_F(DivTest, FloatBroadcast)
  {
    Shape input1_shape = {1, 3};
    Shape input2_shape = {3, 1};
@@ -69,8 +81,10 @@ TEST(DivTest, FloatBroadcast)
    std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
    std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
  
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
@@ -79,12 +93,13 @@ TEST(DivTest, FloatBroadcast)
  
    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
  }
  
-TEST(DivTest, Uint8)
+TEST_F(DivTest, Uint8)
  {
    Shape base_shape = {1, 2, 2, 1};
  
@@ -98,10 +113,10 @@ TEST(DivTest, Uint8)
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
  
-  Tensor input1_tensor =
-    makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
-  Tensor input2_tensor =
-    makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
+  Tensor input1_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
  
    Tensor output_tensor =
      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
@@ -111,6 +126,7 @@ TEST(DivTest, Uint8)
  
    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -118,10 +134,10 @@ TEST(DivTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
  }
  
-TEST(DivTest, Input_Output_Type_NEG)
+TEST_F(DivTest, Input_Output_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    DivParams params{};
@@ -131,10 +147,10 @@ TEST(DivTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(DivTest, Invalid_Input_Type_NEG)
+TEST_F(DivTest, Invalid_Input_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    DivParams params{};
@@ -142,6 +158,7 @@ TEST(DivTest, Invalid_Input_Type_NEG)
  
    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp

index 4563960559e7b8598e93664bcf1452a0ee28541a..697d63be4d083ea9c253c52a7c8544262e3b0bb7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Elu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.cpp
@@ -17,7 +17,7 @@
  #include "kernels/Elu.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALElu.h"
  
  #include <stdexcept>
  
@@ -40,8 +40,8 @@ void Elu::execute() const
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()),
-                                 getTensorShape(output()), getTensorData<float>(output()));
+      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp

index e26eed03eec9dcae017ff6ff53ce525b78ae05f5..814499cdb4d7a716b63d9fc85a067d12bcc94769 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Elu.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,11 +30,14 @@ using namespace testing;
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Elu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    (void)output_shape;
@@ -58,12 +62,14 @@ TEST(EluTest, SimpleElu)
  
  TEST(EluTest, InOutTypeMismatch_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, -6, 2,  -4,   //
      3, -2, 10, -0.1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Elu kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h

index 69b3be77479ca8bb58111f3a21ad9aab5557732d..11f025eacd7118069369cdc21f1cdc77926ad6c4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.h
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp

index ba2827ba98f916daaa894764cbe2faf0b63be23e..46a0f97d8eb5bf8a95cab70ebe0d4ea422d0b9c0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Equal.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(EqualTest, FloatSimple)
+class EqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(EqualTest, FloatSimple)
      false, true, false, // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(EqualTest, FloatBroardcast)
+TEST_F(EqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -76,12 +86,13 @@ TEST(EqualTest, FloatBroardcast)
      true,  true,  true,  // Row 4
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -92,7 +103,7 @@ TEST(EqualTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(EqualTest, Uint8Quantized)
+TEST_F(EqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.5, 0.7,  0.9, // Row 1
@@ -110,24 +121,25 @@ TEST(EqualTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
  
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(EqualTest, Uint8QuantizedBroadcast)
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -148,34 +160,35 @@ TEST(EqualTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(EqualTest, Input_Type_Mismatch_NEG)
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(EqualTest, Input_Output_Type_NEG)
+TEST_F(EqualTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Equal kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-interpreter/src/kernels/Exp.cpp

index f7b115ab3db7fabb9e7df8e1553e308ff016d452..e7c560a88d9c136c2d22e3a7a6b145b8b1f5faad 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Exp.cpp
+++ b/compiler/luci-interpreter/src/kernels/Exp.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-interpreter/src/kernels/Exp.test.cpp

index 19b2c141ad501648fb1779c43314fb361ce4224b..a159d9db937399518da70112e2ecda06d79e7402 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Exp.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Exp.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Exp.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,13 +30,16 @@ using namespace testing;
  
  TEST(ExpTest, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    Shape input_shape{1, 1, 7};
    std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Exp kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<int32_t> ref_output_shape{1, 1, 7};
diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp

index d90d611d903c9f35f9c1b39a30a243cd2606a41b..30076fb54b10871ebf32e4660d3ad76e0a717751 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Floor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Floor.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(FloorTest, SimpleFloat)
+class FloorTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
  {
    std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
@@ -40,20 +49,22 @@ TEST(FloorTest, SimpleFloat)
      3, 7, 10, -1, // Row 2
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Floor kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(FloorTest, Input_Output_Type_NEG)
+TEST_F(FloorTest, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    Floor kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp

index 16831ca801905a740db632d3c8eeb60c1663104e..3e1b5f18ec509386ada4427d57001558cffcf4af 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/FloorDiv.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(FloorDivTest, FloatSimple)
+class FloorDivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
  {
    Shape x_shape{2, 3};
    std::vector<float> x_data{
@@ -47,12 +56,13 @@ TEST(FloorDivTest, FloatSimple)
      1, 1, 1, // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -60,7 +70,7 @@ TEST(FloorDivTest, FloatSimple)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(FloorDivTest, FloatBroadcast)
+TEST_F(FloorDivTest, FloatBroadcast)
  {
    Shape x_shape{1, 3};
    std::vector<float> x_data{
@@ -81,12 +91,13 @@ TEST(FloorDivTest, FloatBroadcast)
      1, 3,  -4, // Row 3
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -94,36 +105,37 @@ TEST(FloorDivTest, FloatBroadcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(FloorDivTest, DivByZero_NEG)
+TEST_F(FloorDivTest, DivByZero_NEG)
  {
    Shape shape{3};
    std::vector<float> x_data{1, 0, -1};
    std::vector<float> y_data{0, 0, 0};
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
  
    EXPECT_ANY_THROW(kernel.execute());
  }
  
-TEST(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(FloorDivTest, Input_Type_Mismatch_NEG)
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp

index 48433b42d6df38723bec93c7e4d70f37f0c99c5d..cfe8f8bf26aef969c6cc16a47157369b57496c07 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -19,6 +19,7 @@
  #include "kernels/Utils.h"
  
  #include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
  
  #include <stdexcept>
  
@@ -48,6 +49,12 @@ void FullyConnected::configure()
      LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
      LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
    }
+  else if (weights()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
    else
    {
      throw std::runtime_error("Unsupported type.");
@@ -77,6 +84,9 @@ void FullyConnected::execute() const
      case DataType::U8:
        evalQuantized();
        break;
+    case DataType::S8:
+      evalQuantizedS8();
+      break;
      case DataType::FLOAT32:
        evalFloat();
        break;
@@ -135,5 +145,38 @@ void FullyConnected::evalQuantized() const
      getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
+void FullyConnected::evalQuantizedS8() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  tflite::reference_integer_ops::FullyConnected(
+    op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+    getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<int8_t>(output()));
+}
+
  } // namespace kernels
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h

index 204f11ebb4260238da3532569316f74acd4de15c..2a7c068c002b5e67f94682d71e8693453dcc5b34 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.h
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h
@@ -42,6 +42,7 @@ public:
  private:
    void evalFloat() const;
    void evalQuantized() const;
+  void evalQuantizedS8() const;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp

index 0259d3e1dfffe20fb593ca4f1007f310af9f6d69..b0eda014528575a592b1efe6487b88f71a17ebc6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/FullyConnected.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -32,9 +33,13 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
             std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FullyConnectedParams params{};
@@ -42,12 +47,51 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
    FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
    EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
  }
  
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+                   std::initializer_list<int32_t> weights_shape,
+                   std::initializer_list<int32_t> bias_shape,
+                   std::initializer_list<int32_t> output_shape,
+                   std::initializer_list<float> input_data,
+                   std::initializer_list<float> weights_data,
+                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
  template <>
  void Check<uint8_t>(
    std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
@@ -55,15 +99,19 @@ void Check<uint8_t>(
    std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
    std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    const float quantized_tolerance = getTolerance(-127, 128, 255);
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
    std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first,
-                                                        input_quant_param.second, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(
-    bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
@@ -72,6 +120,7 @@ void Check<uint8_t>(
  
    FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -83,7 +132,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test
  {
  };
  
-using DataTypes = ::testing::Types<float, uint8_t>;
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
  TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
  
  TYPED_TEST(FullyConnectedTest, Simple)
@@ -121,9 +170,13 @@ TEST(FullyConnectedTest, InvalidBiasType_NEG)
    Shape bias_shape{3};
    std::vector<int32_t> bias_data{-1, -5, -8};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FullyConnectedParams params{};
@@ -149,9 +202,14 @@ TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
    Shape bias_shape{3};
    std::vector<float> bias_data{-1, -5, -8};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FullyConnectedParams params{};
@@ -180,9 +238,14 @@ TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
    Shape bias_shape{3};
    std::vector<float> bias_data{-1, -5, -8};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
-  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    FullyConnectedParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h

index a65d29f5c8026441e6a7a8eacadfec42d8d75d87..877c139c911373333f1c757e7c99eec86ea33203 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.h
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp

index 3fcc8660342e85c5ae75b355dceafea6dd6946fe..ba3925f1728174513c72bcb5d2357e384346f026 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Greater.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(GreaterTest, FloatSimple)
+class GreaterTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(GreaterTest, FloatSimple)
      true,  false, false, // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(GreaterTest, FloatBroardcast)
+TEST_F(GreaterTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(GreaterTest, FloatBroardcast)
      false, false, true,  // Row 3
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(GreaterTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(GreaterTest, Uint8Quantized)
+TEST_F(GreaterTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(GreaterTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterTest, Uint8QuantizedRescale)
+TEST_F(GreaterTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(GreaterTest, Uint8QuantizedRescale)
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
  
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterTest, Uint8QuantizedBroadcast)
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(GreaterTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterTest, Input_Type_Mismatch_NEG)
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(GreaterTest, Input_Output_Type_NEG)
+TEST_F(GreaterTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Greater kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h

index e948d698f2bb2259eb60afc07fcb7b1dc9b9cfb4..4a0f48748405e4f93040614d862dc07b4d3f3e65 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp

index 7c79d8abc34ebff91405c45b78261c1d5c49c76d..a9d1723011db11a4f51acef39c86a744f7d3deb5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/GreaterEqual.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(GreaterEqualTest, FloatSimple)
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(GreaterEqualTest, FloatSimple)
      true,  true, false, // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(GreaterEqualTest, FloatBroardcast)
+TEST_F(GreaterEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(GreaterEqualTest, FloatBroardcast)
      false, false, true,  // Row 3
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(GreaterEqualTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(GreaterEqualTest, Uint8Quantized)
+TEST_F(GreaterEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(GreaterEqualTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterEqualTest, Uint8QuantizedRescale)
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
      0.5, 0.5, 0.7,  0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(GreaterEqualTest, Uint8QuantizedRescale)
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(GreaterEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(GreaterEqualTest, Input_Output_Type_NEG)
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp

index a267f6267f60eab7ceef8ae4bd3b5ca5a86d8b00..971708bca388e376b9814a1f76b17387a732e02d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/If.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -68,6 +68,8 @@ void If::execute() const
  
      const int32_t num_elements = input(i)->shape().num_elements();
      const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(graph_inputs[i]);
      std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
    }
  
@@ -78,6 +80,8 @@ void If::execute() const
    {
      LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
      output(i)->resize(graph_outputs[i]->shape());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(output(i));
  
      const int32_t num_elements = output(i)->shape().num_elements();
      const std::size_t element_size = getDataTypeSize(output(i)->element_type());
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp

index 0dba310d9ca1bb6494cd4c3cf5fb6d7e355cdb06..c5f4faf75f67cae27e26fea5a785300f8052c022 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/If.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -21,6 +21,8 @@
  #include "kernels/Mul.h"
  #include "kernels/TestUtils.h"
  
+#include "luci_interpreter/TestMemoryManager.h"
+
  namespace luci_interpreter
  {
  namespace kernels
@@ -30,9 +32,17 @@ namespace
  
  using namespace testing;
  
-RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
+class IfTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
  {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
    Tensor *input1 = graph->addTensor(
      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *input2 = graph->addTensor(
@@ -40,6 +50,10 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
    Tensor *output = graph->addTensor(
      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
  
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
    graph->setInputTensors({input1, input2});
    graph->setOutputTensors({output});
  
@@ -50,9 +64,9 @@ RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
    return graph;
  }
  
-RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
  {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
    Tensor *input1 = graph->addTensor(
      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
    Tensor *input2 = graph->addTensor(
@@ -60,6 +74,10 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
    Tensor *output = graph->addTensor(
      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
  
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
    graph->setInputTensors({input1, input2});
    graph->setOutputTensors({output});
  
@@ -70,67 +88,69 @@ RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
    return graph;
  }
  
-TEST(IfTest, CondTrue)
+TEST_F(IfTest, CondTrue)
  {
-  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
    Tensor output = makeOutputTensor(DataType::FLOAT32);
  
    RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
  
    If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
    kernel.configure();
+  _memory_manager->allocate_memory(output);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
  }
  
-TEST(IfTest, CondFalse)
+TEST_F(IfTest, CondFalse)
  {
-  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
    Tensor output = makeOutputTensor(DataType::FLOAT32);
  
    RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
  
    If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
    kernel.configure();
+  _memory_manager->allocate_memory(output);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
  }
  
-TEST(IfTest, InvalidCondType_NEG)
+TEST_F(IfTest, InvalidCondType_NEG)
  {
-  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
    Tensor output = makeOutputTensor(DataType::FLOAT32);
  
    RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
  
    If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(IfTest, InvalidCondElementNum_NEG)
+TEST_F(IfTest, InvalidCondElementNum_NEG)
  {
-  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true});
-  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
-  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
    Tensor output = makeOutputTensor(DataType::FLOAT32);
  
    RuntimeModule module(nullptr);
-  RuntimeGraph *then_graph = buildAddSubgraph(&module);
-  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
  
    If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
    EXPECT_ANY_THROW(kernel.configure());
diff --git a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp

index 1d4ccb4cda085298b1365c3e12b681871a262eee..04400c3c0a73c3785e010b0bc9a71aa752adb0c5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -15,6 +15,7 @@
   */
  #include "kernels/InstanceNorm.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -24,11 +25,21 @@ namespace
  {
  
  using namespace testing;
-TEST(InstanceNormTest, Simple)
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1});
-  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
-  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2});
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    InstanceNormParams params{};
@@ -37,17 +48,19 @@ TEST(InstanceNormTest, Simple)
  
    InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
  }
  
-TEST(InstanceNormTest, Single_gamma_beta)
+TEST_F(InstanceNormTest, Single_gamma_beta)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1});
-  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
-  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2});
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    InstanceNormParams params{};
@@ -56,17 +69,19 @@ TEST(InstanceNormTest, Single_gamma_beta)
  
    InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
  }
  
-TEST(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1});
-  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1});
-  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2});
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    InstanceNormParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp

index 2eaf5404e3ba4bd29015b3ef22ef58607591e6a1..64222953fd63c82654dbb0c8001e877cf1e1a348 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -17,7 +17,7 @@
  #include "kernels/L2Normalize.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Normalize.h"
  
  #include <stdexcept>
  
@@ -66,9 +66,9 @@ template <typename T> void L2Normalize::eval(int32_t zero_point) const
  {
    tflite::L2NormalizationParams op_params{};
    op_params.input_zero_point = zero_point;
-  tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()),
-                                         getTensorData<T>(input()), getTensorShape(output()),
-                                         getTensorData<T>(output()));
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp

index 6281b451b53646c8451b9c51e55f7405d92fd643..1e565e3580a335d24fee3bfab2d81f24db8be81f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -16,6 +16,7 @@
   */
  #include "kernels/L2Normalize.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,7 +31,9 @@ template <typename T>
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    L2NormParams params{};
@@ -38,6 +41,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
    L2Normalize kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -50,12 +54,13 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> input_data,
                      std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::pair<float, int32_t> quant_param =
      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
  
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
  
    L2NormParams params{};
@@ -63,6 +68,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
  
    L2Normalize kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -85,9 +91,11 @@ TYPED_TEST(L2NormalizeTest, Simple)
  
  TEST(L2NormalizeTest, ActivationType_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    L2NormParams params{};
@@ -99,9 +107,11 @@ TEST(L2NormalizeTest, ActivationType_NEG)
  
  TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
  
-  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
  
    L2NormParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp

index 5bf3ba5a8a5f3f3de8c6d898dd1f22b0926df63b..5a88808d5cefb212b283c26dfb5cbde0ea138cd3 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALL2Pool2D.h"
  
  #include <stdexcept>
  
@@ -75,9 +75,9 @@ void L2Pool2D::execute() const
        op_params.padding_values.width = _padding_width;
        op_params.float_activation_min = activation_min;
        op_params.float_activation_max = activation_max;
-      tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()),
-                                    getTensorData<float>(input()), getTensorShape(output()),
-                                    getTensorData<float>(output()));
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp

index 52f426a08ff0ad980835f9c9ae114ebfcdce3751..289742a50553b5177cf37aa138f8106e65d92138 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/L2Pool2D.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,14 +28,23 @@ namespace
  
  using namespace testing;
  
-TEST(L2Pool2DTest, FloatNone)
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(L2Pool2DTest, FloatNone)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{3.5, 6.5};
@@ -54,14 +65,15 @@ TEST(L2Pool2DTest, FloatNone)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatRelu)
+TEST_F(L2Pool2DTest, FloatRelu)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      -1, -6, 2,  4, //
      -3, -2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -74,6 +86,7 @@ TEST(L2Pool2DTest, FloatRelu)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{3.53553, 6.5};
@@ -81,14 +94,15 @@ TEST(L2Pool2DTest, FloatRelu)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatRelu1)
+TEST_F(L2Pool2DTest, FloatRelu1)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      -0.1, -0.6, 2,  4, //
      -0.3, -0.2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -101,6 +115,7 @@ TEST(L2Pool2DTest, FloatRelu1)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.353553, 1.0};
@@ -108,14 +123,15 @@ TEST(L2Pool2DTest, FloatRelu1)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatRelu6)
+TEST_F(L2Pool2DTest, FloatRelu6)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      -0.1, -0.6, 2,  4, //
      -0.3, -0.2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -128,6 +144,7 @@ TEST(L2Pool2DTest, FloatRelu6)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.353553, 6.0};
@@ -135,14 +152,15 @@ TEST(L2Pool2DTest, FloatRelu6)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatPaddingSame)
+TEST_F(L2Pool2DTest, FloatPaddingSame)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -155,6 +173,7 @@ TEST(L2Pool2DTest, FloatPaddingSame)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{3.5, 6.5};
@@ -162,14 +181,15 @@ TEST(L2Pool2DTest, FloatPaddingSame)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatPaddingSameStride)
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -182,6 +202,7 @@ TEST(L2Pool2DTest, FloatPaddingSameStride)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
@@ -189,14 +210,15 @@ TEST(L2Pool2DTest, FloatPaddingSameStride)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, FloatPaddingValidStride)
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -209,6 +231,7 @@ TEST(L2Pool2DTest, FloatPaddingValidStride)
  
    L2Pool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{3.5, 6.0, 6.5};
@@ -216,14 +239,15 @@ TEST(L2Pool2DTest, FloatPaddingValidStride)
    // TODO make a Shape checking of output_tensor.
  }
  
-TEST(L2Pool2DTest, InvalidInputShape_NEG)
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
  {
    Shape input_shape{1, 2, 4};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -238,14 +262,15 @@ TEST(L2Pool2DTest, InvalidInputShape_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(L2Pool2DTest, InvalidInputOutputType_NEG)
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
  {
    Shape input_shape{1, 2, 4};
    std::vector<float> input_data{
      0, 6, 2,  4, //
      3, 2, 10, 7, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Pool2DParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp

index f468da5d32b27b0fc34ed9a88bf1974f2b157072..3833a55e84fa256d24726063f23445f91677a6ec 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -18,8 +18,9 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
  
  #include <stdexcept>
  
@@ -66,9 +67,8 @@ void LeakyRelu::evalFloat() const
  {
    tflite::LeakyReluParams op_params{};
    op_params.alpha = params().alpha;
-  tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()),
-                                   getTensorData<float>(input()), getTensorShape(output()),
-                                   getTensorData<float>(output()));
+  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
  }
  
  void LeakyRelu::evalQuantized() const
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp

index b5cc3e7fce9a351a6766f08da59e779ecff26351..6ec8a348a5aed43496395d9366507e4615960bea 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/LeakyRelu.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -31,8 +32,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<float> input_data, std::initializer_list<float> output_data,
             float alpha)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    LeakyReluParams params{};
@@ -41,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
    LeakyRelu kernel(&input_tensor, &output_tensor, params);
  
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -53,10 +57,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> input_data,
                      std::initializer_list<float> output_data, float alpha)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    LeakyReluParams params{};
@@ -65,6 +70,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
    LeakyRelu kernel(&input_tensor, &output_tensor, params);
  
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -99,10 +105,13 @@ TYPED_TEST(LeakReluTest, Simple)
  
  TEST(LeakReluTest, IvalidInputOutputType_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, {
-                                                                     0.0f, 1.0f, 3.0f,   // Row 1
-                                                                     1.0f, -1.0f, -2.0f, // Row 2
-                                                                   });
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+                                                           {
+                                                             0.0f, 1.0f, 3.0f,   // Row 1
+                                                             1.0f, -1.0f, -2.0f, // Row 2
+                                                           },
+                                                           memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    LeakyReluParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h

index fe03e10b10bac1d8679fcec39d9e80ff52a3dc0a..293740e72fd6f7ac5afd8ec507bf83cac441efbe 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.h
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp

index 2972bd559b26b8027a66d03226f87e19754e996d..e9d09b288d2c2142681e5d73213a7bf69ca95339 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Less.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(LessTest, FloatSimple)
+class LessTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(LessTest, FloatSimple)
      false, false, true,  // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(LessTest, FloatBroardcast)
+TEST_F(LessTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(LessTest, FloatBroardcast)
      true,  true,  false, // Row 3
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(LessTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(LessTest, Uint8Quantized)
+TEST_F(LessTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(LessTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessTest, Uint8QuantizedRescale)
+TEST_F(LessTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(LessTest, Uint8QuantizedRescale)
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessTest, Uint8QuantizedBroadcast)
+TEST_F(LessTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(LessTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessTest, Input_Type_Mismatch_NEG)
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LessTest, Input_Output_Type_NEG)
+TEST_F(LessTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Less kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h

index ed4b0f1eadfbcf9964978ab0942d7ab6d3655a52..b6da1a2a850cac32ff74c76209f6d7c919bac40f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.h
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp

index db65815a673cb5a7b94843bdc13a8f6fa85b9d8b..0558003dd90081a49ab7a072b389e63f93fc1632 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LessEqual.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(LessEqualTest, FloatSimple)
+class LessEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(LessEqualTest, FloatSimple)
      false, true, true,  // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(LessEqualTest, FloatBroardcast)
+TEST_F(LessEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -74,12 +84,13 @@ TEST(LessEqualTest, FloatBroardcast)
      true,  true, false, // Row 3
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -90,7 +101,7 @@ TEST(LessEqualTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(LessEqualTest, Uint8Quantized)
+TEST_F(LessEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -108,21 +119,22 @@ TEST(LessEqualTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessEqualTest, Uint8QuantizedRescale)
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
  {
    std::vector<float> x_data{
      0.5, 0.6, 0.7,  0.9, // Row 1
@@ -142,21 +154,22 @@ TEST(LessEqualTest, Uint8QuantizedRescale)
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
  
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessEqualTest, Uint8QuantizedBroadcast)
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -175,34 +188,35 @@ TEST(LessEqualTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(LessEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LessEqualTest, Input_Output_Type_NEG)
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp

index fd2ec41a155f6ccb2c3cac36dfa622f74589145f..a2bf442b0d77bea0ed30655fcb1fe968ec928b34 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALLocalResponseNormalization.h"
  
  #include <stdexcept>
  
@@ -52,7 +52,7 @@ void LocalResponseNormalization::execute() const
        op_params.bias = params().bias;
        op_params.alpha = params().alpha;
        op_params.beta = params().beta;
-      tflite::optimized_ops::LocalResponseNormalization(
+      luci_interpreter_pal::LocalResponseNormalization(
          op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
          getTensorData<float>(output()));
        break;
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp

index 6a4331d342e134e7e7ff9a2e02c97ce6695ec6a9..4a9d4739f0f02c2f1d0a7629adbb4564c00dbb7b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LocalResponseNormalization.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,10 +28,18 @@ namespace
  
  using namespace testing;
  
-TEST(LocalResponseNormalizationTest, SameAsL2Norm)
+class LocalResponseNormalizationTest : public ::testing::Test
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -41,16 +50,17 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm)
  
    LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
                FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
  }
  
-TEST(LocalResponseNormalizationTest, WithAlpha)
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -61,16 +71,17 @@ TEST(LocalResponseNormalizationTest, WithAlpha)
  
    LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
                FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
  }
  
-TEST(LocalResponseNormalizationTest, WithBias)
+TEST_F(LocalResponseNormalizationTest, WithBias)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -81,16 +92,17 @@ TEST(LocalResponseNormalizationTest, WithBias)
  
    LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
                FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
  }
  
-TEST(LocalResponseNormalizationTest, SmallRadius)
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -101,16 +113,17 @@ TEST(LocalResponseNormalizationTest, SmallRadius)
  
    LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
                FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
  }
  
-TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LocalResponseNormalizationParams params{};
@@ -123,10 +136,10 @@ TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    LocalResponseNormalizationParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp

index 03d13e4cec6396c7e9d9d21ab1dd9b70f6bdc031..79c31533825b3ade22eeffff08dc1d30778b4dde 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -18,9 +18,9 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALLogSoftmax.h"
  
  namespace luci_interpreter
  {
@@ -41,8 +41,7 @@ void LogSoftmax::configure()
  
      params.table = _table;
      params.beta = 1.0;
-
-    tflite::optimized_ops::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
    }
    output()->resize(input()->shape());
  }
@@ -76,6 +75,7 @@ void LogSoftmax::evalQuantized() const
    const auto input_scale = input()->scale();
    uint8_t *output_data = getTensorData<uint8_t>(output());
    const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
  
    tflite::SoftmaxParams params{};
  
@@ -83,8 +83,9 @@ void LogSoftmax::evalQuantized() const
    params.zero_point = output()->zero_point();
    params.scale = output()->scale();
  
-  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
-                                    output_data);
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp

index 8a90c1dd0ddefdd77e012482cdfacedc25a98b61..50dcd5c28e6cf0b898a0489e8d51ca7930c870c5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LogSoftmax.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,18 +28,28 @@ namespace
  
  using namespace testing;
  
-TEST(LogSoftmaxTest, Float)
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
  {
    Shape input_shape{2, 4};
    std::vector<float> input_data{
      0, -6, 2,  4, //
      3, -2, 10, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -48,7 +59,7 @@ TEST(LogSoftmaxTest, Float)
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(LogSoftmaxTest, Uint8)
+TEST_F(LogSoftmaxTest, Uint8)
  {
    float kMin = -10;
    float kMax = 10;
@@ -58,12 +69,13 @@ TEST(LogSoftmaxTest, Uint8)
      0, -6, 2,  4, //
      3, -2, 10, 1, //
    };
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -78,28 +90,29 @@ TEST(LogSoftmaxTest, Uint8)
                ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
  }
  
-TEST(LogSoftmaxTest, InvalidInputOutputType_NEG)
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
  {
    std::vector<float> input_data{
      0, -6, 2,  4, //
      3, -2, 10, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
  {
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
    std::vector<float> input_data{
      0, -6, 2,  4, //
      3, -2, 10, 1, //
    };
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
  
    LogSoftmax kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp

index 564f191d5f61ffa57f00b1e5ab81166a60154542..21b7951e02349ea71e861f9afdf28a41702a659e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LogicalAnd.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,15 +28,26 @@ namespace
  
  using namespace testing;
  
-TEST(LogicalAndTest, Basic)
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
  {
    Shape input_shape{1, 1, 1, 4};
-  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true});
-  Tensor input_tensor2 = makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false});
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -43,14 +55,17 @@ TEST(LogicalAndTest, Basic)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
  }
  
-TEST(LogicalAndTest, Broadcast)
+TEST_F(LogicalAndTest, Broadcast)
  {
-  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
-  Tensor input_tensor2 = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true});
+  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -58,20 +73,23 @@ TEST(LogicalAndTest, Broadcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
  }
  
-TEST(LogicalAndTest, MismatchInputType_NEG)
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
-  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LogicalAndTest, InputTypeInvalid_NEG)
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0});
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp

index dccb81102a320a8e79bf7488672d1c4fc0382e90..3cbf27f6b11e78e01b8880312bb444067928bddd 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LogicalNot.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,14 +28,24 @@ namespace
  
  using namespace testing;
  
-TEST(LogicalNotTest, Basic)
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
  {
    Shape input_shape{1, 1, 1, 4};
-  Tensor input_tensor = makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true});
+  Tensor input_tensor =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalNot kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -42,18 +53,20 @@ TEST(LogicalNotTest, Basic)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
  }
  
-TEST(LogicalNotTest, OutputTypeInvalid_NEG)
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
+  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                        _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    LogicalNot kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LogicalNotTest, InputTypeInvalid_NEG)
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalNot kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp

index 7027a2a8bc416b4c474d1d7b04824d36bbdec302..f289ca64f68bb043b21e66675c5998aa4aadcc0c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -20,8 +20,6 @@
  #include "kernels/Utils.h"
  #include "kernels/BinaryOpCommon.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-
  namespace luci_interpreter
  {
  namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp

index 677eac96ac2d66da273a9e15f9d88549e362b9a0..d65a69a5e36a07f02b5d246f63cd3e475a7ef6ef 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/LogicalOr.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,15 +28,26 @@ namespace
  
  using namespace testing;
  
-TEST(LogicalOrTest, Basic)
+class LogicalOrTest : public ::testing::Test
  {
-  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
-  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false});
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+                                                         _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -43,15 +55,18 @@ TEST(LogicalOrTest, Basic)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
  }
  
-TEST(LogicalOrTest, Broadcast)
+TEST_F(LogicalOrTest, Broadcast)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true});
-  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor),
@@ -59,10 +74,12 @@ TEST(LogicalOrTest, Broadcast)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
  }
  
-TEST(LogicalOrTest, MismatchInputType_NEG)
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
-  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false});
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
@@ -70,10 +87,11 @@ TEST(LogicalOrTest, MismatchInputType_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(LogicalOrTest, InputTypeInvalid_NEG)
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0});
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp

index 97d7bf13d604459a57567af3bdcfde09aa1270ac..58e4f185d96ceaf7d593bdead3321b74e8881322 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Logistic.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp

index 41369a41780917e929c878eaf285af2ca1e93af6..70227563fcdf75b20b751251a61897f70f310e10 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Logistic.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,11 +31,15 @@ template <typename T>
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<getElementType<T>()>(input_shape, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(getElementType<T>());
  
    Logistic kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -47,14 +52,18 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> input_data,
                      std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::pair<float, int32_t> input_quant_param =
      quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
  
    Logistic kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -107,9 +116,12 @@ TYPED_TEST(LogisticTest, Simple)
  
  TEST(LogisticTest, IvalidInputOutputType_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape = {1};
    std::vector<float> input_data{10};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
  
    Logistic kernel(&input_tensor, &output_tensor);
@@ -118,11 +130,13 @@ TEST(LogisticTest, IvalidInputOutputType_NEG)
  
  TEST(LogisticTest, IvalidQuantParam_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    Shape input_shape = {2};
    std::vector<float> input_data{-10, 10};
    std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
  
    Logistic kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp

index b9991f7ec7b26379f44211f578790f68fb35dad9..44f2a222f684be84dbcf4f462f4d6bf2de93df47 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/MaxPool2D.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(MaxPool2DTest, Float)
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<float> input_data{
@@ -34,7 +43,8 @@ TEST(MaxPool2DTest, Float)
      -7, -6, -5, -4, -3, //
      5,  4,  3,  6,  7,  //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pool2DParams params{};
@@ -47,6 +57,7 @@ TEST(MaxPool2DTest, Float)
  
    MaxPool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -58,15 +69,15 @@ TEST(MaxPool2DTest, Float)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MaxPool2DTest, Uint8)
+TEST_F(MaxPool2DTest, Uint8)
  {
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
    std::vector<float> input_data{
      0,  -6, 12, 4, //
      -3, -2, 10, 7, //
    };
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pool2DParams params{};
@@ -79,6 +90,7 @@ TEST(MaxPool2DTest, Uint8)
  
    MaxPool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.0, 6.0};
@@ -87,7 +99,7 @@ TEST(MaxPool2DTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MaxPool2DTest, SInt16)
+TEST_F(MaxPool2DTest, SInt16)
  {
    Shape input_shape{1, 3, 5, 1};
    std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
@@ -101,7 +113,8 @@ TEST(MaxPool2DTest, SInt16)
      5, 6, //
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
  
    Pool2DParams params{};
@@ -114,6 +127,7 @@ TEST(MaxPool2DTest, SInt16)
  
    MaxPool2D kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp

index 2ddaeaf04f2a6f04024bb556973c7d919172ea01..e4a505b03102810a2e1429978ddb2723f91e9538 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Maximum.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,34 +28,48 @@ namespace
  
  using namespace testing;
  
-TEST(MaximumTest, Float)
+class MaximumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
  {
    Shape input_shape{3, 1, 2};
    std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
    std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
-  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(MaximumTest, Uint8)
+TEST_F(MaximumTest, Uint8)
  {
    Shape input_shape{3, 1, 2};
    std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
    std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
-  Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<int32_t> ref_output_shape{2, 4};
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp

index 421632812ee6065108a74466b4ac7e48b66ab83a..8e65e0d6dbd393e838284e04697f9049f0c7d7fe 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
  
  #include <stdexcept>
  
@@ -28,7 +28,7 @@ namespace luci_interpreter
  namespace kernels
  {
  
-static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params)
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
  {
    params->axis_count = num_axes;
    for (int i = 0; i < num_axes; ++i)
@@ -42,7 +42,7 @@ static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *
  }
  
  // Returns the number of axes that will be reduced. Removes duplicates.
-static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims)
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
  {
    int reduction_count = num_axes;
    for (int i = 0; i < num_axes; ++i)
@@ -63,7 +63,7 @@ static int getAxisReductionCount(const int *axes_data, int num_axes, int input_n
    return reduction_count;
  }
  
-static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes,
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
                              bool keep_dims)
  {
    int input_num_dims = input_shape.num_dims();
@@ -123,8 +123,10 @@ static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int
    }
  }
  
-Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
-  : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
  {
  }
  
@@ -149,17 +151,28 @@ void Mean::configure()
  
    tflite::MeanParams params{};
    resolveAxes(axes_data, num_axes, &params);
-  const bool need_temporaries = !(
+  _need_temporaries = !(
      _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
      ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
-  if (need_temporaries)
+  if (_need_temporaries)
    {
-    _temp_index =
-      std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
-    _resolved_axes =
-      std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
-    _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
-                                         AffineQuantization{}, "");
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
    }
  }
  
@@ -179,12 +192,6 @@ void Mean::execute() const
      default:
        throw std::runtime_error("Unsupported type.");
    }
-  if (!!_temp_index)
-    _temp_index->deallocate();
-  if (!!_resolved_axes)
-    _resolved_axes->deallocate();
-  if (!!_temp_sum)
-    _temp_sum->deallocate();
  }
  
  void Mean::evalFloat() const
@@ -197,6 +204,10 @@ void Mean::evalFloat() const
    tflite::MeanParams params{};
    resolveAxes(axes_data, num_axes, &params);
  
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
    // Defer to specialized implementation for 4D Mean across axes 1 & 2.
    if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
        ((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -207,12 +218,12 @@ void Mean::evalFloat() const
    }
    else
    {
-    tflite::reference_ops::Mean(
-      getTensorData<float>(input()), getTensorShape(input()).DimsData(),
-      input()->shape().num_dims(), getTensorData<float>(output()),
-      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-      _params.keep_dims, getTensorData<int>(_temp_index.get()),
-      getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
    }
  }
  
@@ -226,6 +237,10 @@ void Mean::evalQuantized() const
    tflite::MeanParams params{};
    resolveAxes(axes_data, num_axes, &params);
  
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
    // Defer to specialized implementation for 4D Mean across axes 1 & 2.
    if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
        ((params.axis[0] == 1 && params.axis[1] == 2) ||
@@ -238,12 +253,12 @@ void Mean::evalQuantized() const
    }
    else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
    {
-    tflite::reference_ops::Mean(
-      getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
-      input()->shape().num_dims(), getTensorData<uint8_t>(output()),
-      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-      _params.keep_dims, getTensorData<int>(_temp_index.get()),
-      getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
    }
    else
    {
@@ -252,8 +267,8 @@ void Mean::evalQuantized() const
        getTensorShape(input()).DimsData(), input()->shape().num_dims(),
        getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
-      _params.keep_dims, getTensorData<int>(_temp_index.get()),
-      getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
        /*compute_sum=*/false);
    }
  }
diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h

index 1cc0468945c4f90d8bfbce041e852b625d5c8d1f..ed07ae56177abdd312b58ce51160a918f93376c2 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mean.h
+++ b/compiler/luci-interpreter/src/kernels/Mean.h
@@ -30,7 +30,8 @@ namespace kernels
  class Mean : public KernelWithParams<ReducerParams>
  {
  public:
-  Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params);
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+       Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
  
    const Tensor *input() const { return _inputs[0]; }
    const Tensor *axes() const { return _inputs[1]; }
@@ -45,9 +46,7 @@ private:
    void evalQuantizedS16() const;
  
  private:
-  std::unique_ptr<Tensor> _temp_index;
-  std::unique_ptr<Tensor> _resolved_axes;
-  std::unique_ptr<Tensor> _temp_sum;
+  bool _need_temporaries = false;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp

index fa0ba21691382544cff15505dbc5f365bf91f47f..d2c00935ab0011563e9b042b755220cf241e3d05 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Mean.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,22 +28,39 @@ namespace
  
  using namespace testing;
  
-TEST(MeanTest, FloatKeepDims)
+class MeanTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
  {
    std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                     9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                     17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
  
    std::vector<int32_t> axis_data{0, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ReducerParams params{};
    params.keep_dims = true;
  
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{10.5, 12.5, 14.5};
@@ -51,22 +69,31 @@ TEST(MeanTest, FloatKeepDims)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MeanTest, FloatKeepDims4DMean)
+TEST_F(MeanTest, FloatKeepDims4DMean)
  {
    std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                     9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                     17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
  
    std::vector<int32_t> axis_data{1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ReducerParams params{};
    params.keep_dims = true;
  
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{6, 7, 18, 19};
@@ -75,22 +102,31 @@ TEST(MeanTest, FloatKeepDims4DMean)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MeanTest, FloatNotKeepDims)
+TEST_F(MeanTest, FloatNotKeepDims)
  {
    std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                     9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
                                     17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
  
    std::vector<int32_t> axis_data{1, 0, -3, -3};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ReducerParams params{};
    params.keep_dims = false;
  
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{12, 13};
@@ -99,23 +135,31 @@ TEST(MeanTest, FloatNotKeepDims)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MeanTest, Uint8KeepDims)
+TEST_F(MeanTest, Uint8KeepDims)
  {
    float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
    std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
  
    std::vector<int32_t> axis_data{1};
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::U8, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    ReducerParams params{};
    params.keep_dims = true;
  
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.3, 0.35, 0.55};
@@ -125,23 +169,31 @@ TEST(MeanTest, Uint8KeepDims)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MeanTest, Uint8NotKeepDims)
+TEST_F(MeanTest, Uint8NotKeepDims)
  {
    float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
    std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
  
    std::vector<int32_t> axis_data{1};
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    ReducerParams params{};
    params.keep_dims = false;
  
-  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.4, 0.4};
@@ -151,7 +203,7 @@ TEST(MeanTest, Uint8NotKeepDims)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(MeanTest, SInt16KeepDims4D)
+TEST_F(MeanTest, SInt16KeepDims4D)
  {
    std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
                                     9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@@ -159,15 +211,24 @@ TEST(MeanTest, SInt16KeepDims4D)
    std::vector<int32_t> axes_data{1, 2};
    std::vector<float> ref_output_data{6, 7, 18, 19};
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data);
-  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
  
    ReducerParams params{};
    params.keep_dims = true;
  
-  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, params);
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
    kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp

index b6420dd9b017b7057bef5aa5b2f841c8fcd557d3..9a143643ff0e94e223dfb568e1f3c213abf6155d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Minimum.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,34 +28,48 @@ namespace
  
  using namespace testing;
  
-TEST(MinimumTest, Float)
+class MinimumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
  {
    Shape input_shape{3, 1, 2};
    std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
    std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
-  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(MinimumTest, Uint8)
+TEST_F(MinimumTest, Uint8)
  {
    Shape input_shape{3, 1, 2};
    std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
    std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
-  Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<int32_t> ref_output_shape{2, 4};
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp

index 1139167e0101aabd93240c8954646f09264d02b9..89049c96c55396dc40c6993b47c7b7f3995384d5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp

new file mode 100644 (file)

index 0000000..de9da50
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: Add tests for MirrorPad
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp

index 4e6e3f75a7b0ae5fe778fdc26c1294f541604acf..bc855de0f71eee4ea83ef1ba8d02ce1f9890f313 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -20,7 +20,9 @@
  #include "kernels/BinaryOpCommon.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  
  #include <stdexcept>
  
@@ -77,15 +79,15 @@ void Mul::evalFloat() const
  
    if (need_broadcast)
    {
-    tflite::optimized_ops::BroadcastMul4DSlow(
+    luci_interpreter_pal::BroadcastMul4DSlow(
        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
    }
    else
    {
-    tflite::optimized_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
    }
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp

index fc7ffb5a19456ec9f50f8d29345e8be9ae2e321b..471f6ac8607fb8bc57d3e9803bceab0301fac2d4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Mul.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(MulTest, Float)
+class MulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
  {
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -45,8 +54,10 @@ TEST(MulTest, Float)
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
      MulParams params{};
@@ -54,6 +65,7 @@ TEST(MulTest, Float)
  
      Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -62,8 +74,10 @@ TEST(MulTest, Float)
    // Re-run with exchanged inputs.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
      MulParams params{};
@@ -71,6 +85,7 @@ TEST(MulTest, Float)
  
      Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -78,7 +93,7 @@ TEST(MulTest, Float)
    }
  }
  
-TEST(MulTest, SInt16)
+TEST_F(MulTest, SInt16)
  {
    Shape base_shape = {2, 3, 1, 2};
    std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -99,9 +114,10 @@ TEST(MulTest, SInt16)
      {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
-    Tensor input2_tensor =
-      makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
      const float tolerance = output_tensor.scale() * 2;
  
@@ -110,6 +126,7 @@ TEST(MulTest, SInt16)
  
      Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorShape(output_tensor),
@@ -121,9 +138,10 @@ TEST(MulTest, SInt16)
    // Re-run with exchanged inputs and different scales.
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor =
-      makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
-    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
      const float tolerance = output_tensor.scale() * 2;
  
@@ -132,6 +150,7 @@ TEST(MulTest, SInt16)
  
      Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorShape(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-interpreter/src/kernels/Neg.cpp

index 99f4d4a218963d2e0542d94591e937e3a2b83fcc..c6fe08a9ea2c9370c46c4efbf9ba6876f06d1746 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Neg.cpp
+++ b/compiler/luci-interpreter/src/kernels/Neg.cpp
@@ -17,7 +17,7 @@
  #include "kernels/Neg.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALNeg.h"
  
  #include <stdexcept>
  
@@ -50,8 +50,8 @@ void Neg::execute() const
  
  void Neg::evalFloat() const
  {
-  tflite::reference_ops::Negate(getTensorShape(input()), getTensorData<float>(input()),
-                                getTensorShape(output()), getTensorData<float>(output()));
+  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-interpreter/src/kernels/Neg.test.cpp

index 33256e1c6f7a59551788e526b9f49061c6ddce1f..8b2bc1a828bfe608e557528e63463d294d730d79 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Neg.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Neg.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Neg.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -31,13 +32,16 @@ template <typename T>
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<T> input_data, std::initializer_list<T> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    Neg kernel(&input_tensor, &output_tensor);
  
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h

index d729c6c14bb443cd97c51f859c6c310b16425e89..247874df7125680f0515f1e819f7c9ac28c88e64 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.h
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -42,9 +42,9 @@ private:
  
  private:
    int32_t _x_multiplier = 0;
-  int32_t _x_shift = 0;
+  int _x_shift = 0;
    int32_t _y_multiplier = 0;
-  int32_t _y_shift = 0;
+  int _y_shift = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp

index f9dc7781b75e86edefb34d7661dcc0b0562f15f0..763f868932658316b481536596ba81f7ef35e65d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/NotEqual.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(NotEqualTest, FloatSimple)
+class NotEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -44,19 +53,20 @@ TEST(NotEqualTest, FloatSimple)
      true, false, true, // Row 2
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(NotEqualTest, FloatBroardcast)
+TEST_F(NotEqualTest, FloatBroardcast)
  {
    std::vector<float> x_data{
      0.5, 0.7, 0.9, // Row 1
@@ -76,12 +86,13 @@ TEST(NotEqualTest, FloatBroardcast)
      false, false, false, // Row 4
    };
  
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
@@ -92,7 +103,7 @@ TEST(NotEqualTest, FloatBroardcast)
  const float F_MIN = -128.0 / 128.0;
  const float F_MAX = 127.0 / 128.0;
  
-TEST(NotEqualTest, Uint8Quantized)
+TEST_F(NotEqualTest, Uint8Quantized)
  {
    std::vector<float> x_data{
      0.5, 0.5, 0.7,  0.9, // Row 1
@@ -110,24 +121,25 @@ TEST(NotEqualTest, Uint8Quantized)
    };
  
    std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
  
    std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(NotEqualTest, Uint8QuantizedBroadcast)
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
  {
    std::vector<float> x_data{
      0.4,  -0.8, 0.7,  0.3, // Row 1
@@ -148,34 +160,35 @@ TEST(NotEqualTest, Uint8QuantizedBroadcast)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
-  Tensor x_tensor =
-    makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
-  Tensor y_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
    EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
  }
  
-TEST(NotEqualTest, Input_Type_Mismatch_NEG)
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(NotEqualTest, Input_Output_Type_NEG)
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
  {
-  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-interpreter/src/kernels/PRelu.cpp

index a53ac6f80c256605ca93af94171ad92531a4eb13..5a6b05c3a0e22846ddd57174a3c9d3663959047b 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/PRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/PRelu.cpp
@@ -19,7 +19,8 @@
  #include "kernels/BinaryOpCommon.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
  
  #include <stdexcept>
  
@@ -168,10 +169,11 @@ static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
    constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
  
    const int32_t output_val =
-    input_val >= 0 ? tflite::MultiplyByQuantizedMultiplier(input_val, identity_mult.multiplier,
-                                                           identity_mult.shift)
-                   : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val,
-                                                           alpha_mult.multiplier, alpha_mult.shift);
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
    const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
    return clamped_output;
  }
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-interpreter/src/kernels/PRelu.h

index e85c3f7e98ad7c827df2830f956e6724b85d6c02..f7735d418ff3d675636b971bf568b31ce6ecaf32 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/PRelu.h
+++ b/compiler/luci-interpreter/src/kernels/PRelu.h
@@ -50,7 +50,7 @@ private:
    std::vector<ChannelQuantMultipliers> _alpha_multipliers;
    // TODO merge this into one ChannelQuantMultiplier object
    int32_t _output_multiplier_identity = 0;
-  int32_t _output_shift_identity = 0;
+  int _output_shift_identity = 0;
  };
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp

index 3dbc51cc1faf9bf4c309b2512ee35ed41622a9ef..6d97382de5a13916cc115c68b19b47dc41e568ed 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/PRelu.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -32,14 +33,18 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
             std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
-  Tensor alpha_tensor = makeInputTensor<element_type>(alpha_shape, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
  
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -97,6 +102,7 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
  
  TEST(PReluTest, Uint8Simple)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
    std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
    std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
@@ -104,14 +110,15 @@ TEST(PReluTest, Uint8Simple)
    float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
  
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -147,14 +154,16 @@ TEST(PReluTest, Uint8Broadcast)
    const float kMax = 127.f / 128.f;
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
  
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first, quant_param.second, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -166,12 +175,15 @@ TEST(PReluTest, Uint8Broadcast)
  
  TEST(PReluTest, SInt16_LWQ_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    // Rewrite this test in case layer-wise quantization for sint16 is supported
    std::vector<float> input_data(6); // data is not important
    std::vector<float> alpha_data(6);
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -180,18 +192,22 @@ TEST(PReluTest, SInt16_LWQ_NEG)
  
  TEST(PReluTest, SInt16_CWQ_Simple)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
    std::vector<float> alpha_data{0.5f, 0.25f};
    std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
  
    std::vector<float> alpha_scales{0.05f, 0.025f};
    std::vector<int32_t> zerop{0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
@@ -200,14 +216,16 @@ TEST(PReluTest, SInt16_CWQ_Simple)
  
  TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data(6); // data is not important
    std::vector<float> alpha_data(6);
  
    std::vector<float> alpha_scales{0.25f, 0.05f};
    std::vector<int32_t> zerop{0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -216,14 +234,16 @@ TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
  
  TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data(6); // data is not important
    std::vector<float> alpha_data(6);
  
    std::vector<float> alpha_scales{0.25f};
    std::vector<int32_t> zerop{0};
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+                                                       alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -232,19 +252,22 @@ TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
  
  TEST(PReluTest, SInt16_CWQ_uneven_shape1)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
    std::vector<float> alpha_data{0.5f, 0.25f};
    std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
  
    std::vector<float> alpha_scales{0.05f, 0.025f};
    std::vector<int32_t> zerop{0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+                                                       alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
@@ -253,6 +276,7 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape1)
  
  TEST(PReluTest, SInt16_CWQ_uneven_shape2)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{
      0.0f,   0.0f,   0.0f,   // Row 1, Column 1
      0.5f,   0.5f,   0.5f,   // Row 1, Column 2
@@ -269,13 +293,15 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2)
  
    std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
    std::vector<int32_t> zerop{0, 0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data);
-  Tensor alpha_tensor =
-    makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3, alpha_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
@@ -284,8 +310,9 @@ TEST(PReluTest, SInt16_CWQ_uneven_shape2)
  
  TEST(PReluTest, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -294,8 +321,9 @@ TEST(PReluTest, Input_Output_Type_NEG)
  
  TEST(PReluTest, Input_Alpha_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
@@ -304,23 +332,29 @@ TEST(PReluTest, Input_Alpha_Type_NEG)
  
  TEST(PReluTest, Invalid_Input_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
  TEST(PReluTest, Input_Output_U8_CWQ_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> scales{1.f, 1.f};
    std::vector<int32_t> zerop{0, 0};
    std::vector<float> dummy_data(4, 0.f);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
-  Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
@@ -328,12 +362,16 @@ TEST(PReluTest, Input_Output_U8_CWQ_NEG)
  
  TEST(PReluTest, Input_Output_S16_CWQ_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> scales{1.f, 1.f};
    std::vector<int32_t> zerop{0, 0};
    std::vector<float> dummy_data(4, 0.f);
-  Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
-  Tensor output_tensor = makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
@@ -341,10 +379,14 @@ TEST(PReluTest, Input_Output_S16_CWQ_NEG)
  
  TEST(PReluTest, Mixing_U8_S16_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> dummy_data(4, 0.f);
-  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
-  Tensor alpha_tensor = makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data);
-  Tensor output_tensor = makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
  
    PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp

index 092bd449ab98c23f83d45850e9e4d2e48045cb97..90a0f894e5cbb82931739fe38114a00b09bc4e24 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Pack.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -31,6 +32,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
             std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
             std::initializer_list<T> output_data, int32_t axis)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
    std::vector<const Tensor *> inputs(input_datas.size());
    std::vector<Tensor> tmp_inputs;
@@ -39,11 +41,13 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
      if (std::is_same<T, float>::value)
      {
        tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
        tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
      }
      else
      {
        tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
        tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
      }
    }
@@ -64,6 +68,7 @@ void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
    Pack kernel(inputs, &output_tensor, params);
  
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -103,12 +108,13 @@ TYPED_TEST(PackTest, NegAxis)
  
  TEST(Pack, MismatchingInputValuesCount_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input1_data{1, 4};
    std::vector<float> input2_data{2, 5};
    std::vector<float> input3_data{3, 6};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
-  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    PackParams params{};
    {
@@ -122,12 +128,13 @@ TEST(Pack, MismatchingInputValuesCount_NEG)
  
  TEST(Pack, InvalidInputAxis_NEG)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input1_data{1, 4};
    std::vector<float> input2_data{2, 5};
    std::vector<float> input3_data{3, 6};
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data);
-  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data);
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
    PackParams params{};
    {
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp

index 3e76080a940e23f4c5dd366f7793ebf47a64e9e3..700448e7a2412465429f1542ce98aad8849d4966 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp

index 75b2e560e11c514ade7b07b396a4bdc67fe9c97a..7994263e228b0cc014c93fe023c4fcf5c1a4ab6f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Pad.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,17 +31,20 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
  
  TEST(Pad, Uint8)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
    std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
    std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
@@ -52,14 +56,18 @@ TEST(Pad, Uint8)
  
  TEST(Pad, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{1, 2, 3, 4, 5, 6};
    std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-interpreter/src/kernels/PadV2.cpp

index 3c215dbcac426420ba836267ea0223b6f7439c5a..e904692399854f59b2035e20f76fbd92f1ef4525 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/PadV2.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp

index 1ee7414011047bad3c386c7d352a94b28b4e89b6..41efaff060b0d327596e9fee994a4c74de41d9fa 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/PadV2.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,20 +31,23 @@ float GetTolerance(float min, float max) { return (max - min) / 255.0; }
  
  TEST(PadV2, Uint8)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
    std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
    std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
    std::vector<float> constant_values_data{0.5};
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first, quant_param.second, input_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
-  Tensor constant_values =
-    makeInputTensor<DataType::U8>({1}, quant_param.first, quant_param.second, constant_values_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values = makeInputTensor<DataType::U8>(
+    {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data = {
@@ -56,16 +60,21 @@ TEST(PadV2, Uint8)
  
  TEST(PadV2, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    std::vector<float> input_data{1, 2, 3, 4, 5, 6};
    std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
    std::vector<float> constant_values_data{7};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
-  Tensor constant_values = makeInputTensor<DataType::FLOAT32>({1}, constant_values_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values =
+    makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp

index a414440c9e12c402a2b1304e2083100fa4ae6599..0e858115d9b7be860cf4916c585e84a1078a3dd8 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Pow.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Pow.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,7 +27,15 @@ namespace
  
  using namespace testing;
  
-TEST(PowTest, SimplePow)
+class PowTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
  {
    std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
  
@@ -34,19 +43,22 @@ TEST(PowTest, SimplePow)
    std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
  
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
  }
  
-TEST(PowTest, FloatBroadcastPow)
+TEST_F(PowTest, FloatBroadcastPow)
  {
    std::initializer_list<int32_t> input1_shape = {1, 3};
    std::initializer_list<int32_t> input2_shape = {3, 1};
@@ -56,60 +68,66 @@ TEST(PowTest, FloatBroadcastPow)
    std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
                                    0.96888f, 0.6178f,  1.3953f, 0.9587f};
  
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
  }
  
-TEST(PowTest, IntPow)
+TEST_F(PowTest, IntPow)
  {
    std::initializer_list<int32_t> base_shape = {1, 3};
  
    std::vector<int32_t> input_data{2, 3, 4};
    std::vector<int32_t> test_outputs{4, 27, 256};
  
-  Tensor input1_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
-  Tensor input2_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
  }
  
-TEST(PowTest, Input_Output_Type_NEG)
+TEST_F(PowTest, Input_Output_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
-  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::BOOL);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(PowTest, Input_Type_Mismatch_NEG)
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4});
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(PowTest, Invalid_Input_Type_NEG)
+TEST_F(PowTest, Invalid_Input_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp

index b5acf1d60672b731599835c9cdd7c439b995601b..747ec6cc89c5eb3a4a57b52c049d1ec62d0d0ce4 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.cpp
@@ -17,7 +17,7 @@
  #include "kernels/Relu.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALRelu.h"
  
  #include <stdexcept>
  
@@ -70,7 +70,7 @@ void Relu::evalFloat() const
    auto output_data = getTensorData<float>(output());
    auto output_shape = getTensorShape(output());
  
-  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
  }
  
  void Relu::evalQuantized() const
@@ -85,8 +85,8 @@ void Relu::evalQuantized() const
      std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
    params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
  
-  tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
  void Relu::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp

index 6623a5b777e5e4df1e5f785d3ac9246030c2097a..bd32e3cc9531dfc274fb7b4104106f9549579a1a 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Relu.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(ReluTest, FloatSimple)
+class ReluTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
  {
    std::vector<float> input_data{
      0.0f, 1.0f,  3.0f,  // Row 1
@@ -39,11 +48,13 @@ TEST(ReluTest, FloatSimple)
      1.0f, 0.0f, 0.0f, // Row 2
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Relu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -51,7 +62,7 @@ TEST(ReluTest, FloatSimple)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(ReluTest, Uint8Quantized)
+TEST_F(ReluTest, Uint8Quantized)
  {
    std::vector<float> input_data{
      0, -6, 2, 4, //
@@ -62,12 +73,13 @@ TEST(ReluTest, Uint8Quantized)
    const float f_max = (127.0 / 128.0) * 8;
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Relu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -76,7 +88,7 @@ TEST(ReluTest, Uint8Quantized)
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
  }
  
-TEST(ReluTest, Uint8Requantized)
+TEST_F(ReluTest, Uint8Requantized)
  {
    std::vector<float> input_data{
      0, -6, 2, 4, //
@@ -90,14 +102,15 @@ TEST(ReluTest, Uint8Requantized)
    const float out_max = (255.0 / 256.0) * 8;
  
    std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
  
    std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
  
    Relu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -106,7 +119,7 @@ TEST(ReluTest, Uint8Requantized)
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
  }
  
-TEST(ReluTest, SInt16)
+TEST_F(ReluTest, SInt16)
  {
    std::vector<float> input_data{
      0, -6, 2, 4, //
@@ -117,33 +130,36 @@ TEST(ReluTest, SInt16)
      3, 0, 7, 1, //
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
  
    Relu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(ReluTest, Input_Output_Type_NEG)
+TEST_F(ReluTest, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Relu kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(ReluTest, Invalid_Input_Type_NEG)
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    Relu kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp

index fa7aa504ab880d9b477ada77f5d7d99cb8df6d3b..07205ed3a367453a8932846a0cd73c784fe11c6c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu6.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp
@@ -17,7 +17,7 @@
  #include "kernels/Relu6.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALRelu6.h"
  
  #include <stdexcept>
  
@@ -63,7 +63,7 @@ void Relu6::evalFloat() const
    auto output_data = getTensorData<float>(output());
    auto output_shape = getTensorShape(output());
  
-  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
  }
  
  void Relu6::evalQuantized() const
@@ -80,8 +80,8 @@ void Relu6::evalQuantized() const
      std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
               params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
  
-  tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
-                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp

index fe991389a22970a4fb40ba2550eeba6e8b0d3c5f..af7b3f3dbb7c1493315038e40eeede994811b2d5 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Relu6.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,7 +28,15 @@ namespace
  
  using namespace testing;
  
-TEST(Relu6Test, FloatSimple)
+class Relu6Test : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
  {
    std::vector<float> input_data{
      0.0f, 1.0f,  3.0f,  // Row 1
@@ -39,11 +48,13 @@ TEST(Relu6Test, FloatSimple)
      6.0f, 0.0f, 0.0f, // Row 2
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Relu6 kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor),
@@ -51,7 +62,7 @@ TEST(Relu6Test, FloatSimple)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
  }
  
-TEST(Relu6Test, Uint8Quantized)
+TEST_F(Relu6Test, Uint8Quantized)
  {
    // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
    const float f_min = (-128.0 / 128.0) * 10;
@@ -64,12 +75,13 @@ TEST(Relu6Test, Uint8Quantized)
    };
  
    std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
  
    Relu6 kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -79,7 +91,7 @@ TEST(Relu6Test, Uint8Quantized)
                FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
  }
  
-TEST(Relu6Test, Uint8Requantized)
+TEST_F(Relu6Test, Uint8Requantized)
  {
    // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
    const float in_min = (-128.0 / 128.0) * 10;
@@ -94,14 +106,15 @@ TEST(Relu6Test, Uint8Requantized)
    };
  
    std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first, quant_input.second, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
  
    std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
  
    Relu6 kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
@@ -111,22 +124,23 @@ TEST(Relu6Test, Uint8Requantized)
                FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
  }
  
-TEST(Relu6Test, Input_Output_Type_NEG)
+TEST_F(Relu6Test, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Relu6 kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(Relu6Test, Invalid_Input_Type_NEG)
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    Relu6 kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp

index 38159380f2c4705f4d5836f4f5a69ec3cbb85555..c2ff3ea1b646921da472bf90d6013e2c587c1299 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Reshape.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,37 +27,51 @@ namespace
  
  using namespace testing;
  
+class ReshapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
  // TODO Test types other than FLOAT32.
  
-TEST(ReshapeTest, Regular)
+TEST_F(ReshapeTest, Regular)
  {
    Shape input_shape{1, 2, 2, 3};
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
    Shape shape_shape{2};
    std::vector<int32_t> shape_data{3, 4};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
  }
  
-TEST(ReshapeTest, UnknownDimension)
+TEST_F(ReshapeTest, UnknownDimension)
  {
    Shape input_shape{2, 1, 2, 3};
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
    Shape shape_shape{3};
    std::vector<int32_t> shape_data{2, -1, 2};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp

index 0e9bcc9200be65d46e8f13cab72b3146c6744f78..e2ddd6a7b388304e80f65f1f0d098c943587af80 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALResizeBilinear.h"
  
  namespace luci_interpreter
  {
@@ -56,12 +56,12 @@ void ResizeBilinear::execute() const
    switch (output()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::ResizeBilinear(
+      luci_interpreter_pal::ResizeBilinear(
          op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::ResizeBilinear(
+      luci_interpreter_pal::ResizeBilinear(
          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
        break;
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp

index 68ef6e6c100b302760deb72cb3745373c0367469..7af20f8c4bf294513981e637b32f28e9ead792de 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/ResizeBilinear.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -33,8 +34,10 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
             bool align_corners, bool half_pixel_centers)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeBilinearParams params{};
@@ -43,6 +46,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
    ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -60,8 +64,11 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
  {
    // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
    // point 0.
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
  
    ResizeBilinearParams params{};
@@ -70,6 +77,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
  
    ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -152,13 +160,17 @@ TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
  
  TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
-                                                                        3, 6,  //
-                                                                        9, 12, //
-                                                                        4, 10, //
-                                                                        10, 16 //
-                                                                      });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeBilinearParams params{};
@@ -171,13 +183,17 @@ TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
  
  TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                           3, 6,  //
-                                                                           9, 12, //
-                                                                           4, 10, //
-                                                                           10, 16 //
-                                                                         });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeBilinearParams params{};
@@ -190,13 +206,17 @@ TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
  
  TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                           3, 6,  //
-                                                                           9, 12, //
-                                                                           4, 10, //
-                                                                           10, 16 //
-                                                                         });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeBilinearParams params{};
@@ -209,13 +229,17 @@ TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
  
  TEST(ResizeBilinearTest, InvalidParams_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                           3, 6,  //
-                                                                           9, 12, //
-                                                                           4, 10, //
-                                                                           10, 16 //
-                                                                         });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeBilinearParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp

index c5226499708579f462a9fd1472c94240f48243f7..306cefbc2d44c328a6610f4355c3d532ee463dc6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -19,8 +19,8 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
  
  namespace luci_interpreter
  {
@@ -61,7 +61,7 @@ void ResizeNearestNeighbor::execute() const
          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::ResizeNearestNeighbor(
+      luci_interpreter_pal::ResizeNearestNeighbor(
          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
        break;
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp

index 0b36a29afd3a076bacb51b7dc1300cc6489daa47..0e9017c78e5a880076234d89275b0eb6bd757462 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/ResizeNearestNeighbor.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -33,8 +34,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
             bool align_corners, bool half_pixel_centers)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeNearestNeighborParams params{};
@@ -43,6 +47,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
    ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -58,12 +63,14 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
                      std::initializer_list<float> output_data, bool align_corners,
                      bool half_pixel_centers)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::pair<float, int32_t> quant_param =
      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
  
    ResizeNearestNeighborParams params{};
@@ -72,6 +79,7 @@ void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
  
    ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -151,13 +159,17 @@ TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
  
  TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
-                                                                        3, 6,  //
-                                                                        9, 12, //
-                                                                        4, 10, //
-                                                                        10, 16 //
-                                                                      });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeNearestNeighborParams params{};
@@ -170,13 +182,17 @@ TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
  
  TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                           3, 6,  //
-                                                                           9, 12, //
-                                                                           4, 10, //
-                                                                           10, 16 //
-                                                                         });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeNearestNeighborParams params{};
@@ -189,13 +205,17 @@ TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
  
  TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
-                                                                           3, 6,  //
-                                                                           9, 12, //
-                                                                           4, 10, //
-                                                                           10, 16 //
-                                                                         });
-  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    ResizeNearestNeighborParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp

index 6e1e6c03c51634836f407f3b0198601a86703a6e..2bd94875bcfff77275be23e587b579eaf60e78fb 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/ReverseV2.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -36,6 +37,8 @@ TYPED_TEST_CASE(ReverseV2Test, DataTypes);
  
  TYPED_TEST(ReverseV2Test, MultiDimensions)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    // TypeParam
    std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
                                      13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
@@ -47,13 +50,15 @@ TYPED_TEST(ReverseV2Test, MultiDimensions)
                                       17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
    std::vector<int32_t> output_shape{4, 3, 2};
  
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
-  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data);
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
  
    ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp

index b93a04ddd47e5ac142e4eea444bd6cabbc42d7ce..3c649423281cf5b63a0f2508c844f490571c946c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Rsqrt.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,11 +30,15 @@ using namespace testing;
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Rsqrt kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -58,7 +63,9 @@ TEST(RsqrtTest, SimpleRsqrt)
  
  TEST(RsqrtTest, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    Rsqrt kernel(&input_tensor, &output_tensor);
@@ -67,11 +74,14 @@ TEST(RsqrtTest, Input_Output_Type_NEG)
  
  TEST(RsqrtTest, Invalid_Input_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    Rsqrt kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp

index 626521815da9e46b8e608bc395305bba978e9c63..37a834a18debcea357e7175d28aa423cf5f38be7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -16,7 +16,7 @@
  
  #include "kernels/Slice.h"
  #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSlice.h"
  
  #include <cassert>
  #include <cstring>
@@ -131,14 +131,13 @@ void Slice::execute() const
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
-                                   getTensorData<float>(input()), getTensorShape(output()),
-                                   getTensorData<float>(output()));
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::Slice(op_params, getTensorShape(input()),
-                                   getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                   getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                  getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported input type.");
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp

index a360a29cc182f8cd353a0678cfb2b0591ca93bbe..3e0d0b0d75fe82e65370636a74893efc15d1e19c 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Slice.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -35,6 +36,8 @@ TYPED_TEST_CASE(SliceTest, DataTypes);
  
  TYPED_TEST(SliceTest, SimpleTest)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
    Shape input_shape{3, 2, 3, 1};
    std::vector<int32_t> begin_data{1, 0, 0, 0};
@@ -44,14 +47,17 @@ TYPED_TEST(SliceTest, SimpleTest)
    std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
    std::vector<int32_t> output_shape{2, 1, 3, 1};
  
-  Tensor input_tensor = makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data);
-  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
-  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
  
    Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
  
    Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp

index 8e29f53eefb8c7bf4a7df3bf74c33a155da4d147..c230aaa70fec27ad35f4ab8003d9e9216fa737ba 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Softmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -19,7 +19,7 @@
  #include "kernels/Utils.h"
  
  #include <tensorflow/lite/kernels/internal/reference/softmax.h>
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSoftmax.h"
  
  #include <stdexcept>
  
@@ -40,10 +40,12 @@ void Softmax::configure()
    LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
    if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
    {
-    LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+                           output()->zero_point() == std::numeric_limits<int8_t>::min());
      tflite::SoftmaxParams op_params{};
      op_params.table = _table;
-    tflite::optimized_ops::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
    }
    output()->resize(input()->shape());
  }
@@ -81,9 +83,9 @@ template <typename T> void Softmax::evalQuantized() const
    op_params.table = const_cast<float *>(_table);
    op_params.zero_point = output()->zero_point();
    op_params.scale = output()->scale();
-
-  tflite::optimized_ops::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
-                                 getTensorShape(output()), getTensorData<T>(output()));
+  luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+  luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+                                getTensorShape(output()), getTensorData<T>(output()));
  }
  
  } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp

index c69a2f9ccd1e6542479250056cca6d7b1c4c2379..9de40b6ec6dd70b262b0ca6f48e262e8723a12c7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Softmax.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -26,46 +27,60 @@ namespace
  
  using namespace testing;
  
-template <typename T>
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
  
    SoftmaxParams params{};
    params.beta = 0.1;
  
    Softmax kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
    EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
  }
  
-template <>
-void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
-                    std::initializer_list<int32_t> output_shape,
-                    std::initializer_list<float> input_data,
-                    std::initializer_list<float> output_data)
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::pair<float, int32_t> input_quant_param =
-    quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
-                                std::max<float>(std::max<float>(input_data), 0.f));
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
    std::pair<float, int32_t> output_quant_param =
-    quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
-                                std::max<float>(std::max<float>(output_data), 0.f));
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
    Tensor output_tensor =
-    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
  
    SoftmaxParams params{};
    params.beta = 0.1;
  
    Softmax kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
@@ -77,7 +92,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test
  {
  };
  
-using DataTypes = ::testing::Types<float, uint8_t>;
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
  TYPED_TEST_CASE(SoftmaxTest, DataTypes);
  
  TYPED_TEST(SoftmaxTest, Simple)
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp

index 2f6a4792540f89cd9dee3906120387ce9e92a097..630cd38c42f6fee76b85c58e651bfc79b7a9a8ee 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -18,7 +18,7 @@
  #include "kernels/SpaceToBatchND.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSpaceToBatchND.h"
  
  #include <stdexcept>
  
@@ -80,7 +80,7 @@ void SpaceToBatchND::execute() const
      tflite::SpaceToBatchParams op_params;
      case DataType::FLOAT32:
        op_params.output_offset = 0;
-      tflite::optimized_ops::SpaceToBatchND(
+      luci_interpreter_pal::SpaceToBatchND(
          op_params, getTensorShape(input()), getTensorData<float>(input()),
          getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
          getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
@@ -88,7 +88,7 @@ void SpaceToBatchND::execute() const
        break;
      case DataType::U8:
        op_params.output_offset = output()->zero_point();
-      tflite::optimized_ops::SpaceToBatchND(
+      luci_interpreter_pal::SpaceToBatchND(
          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
          getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
          getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp

index a6ec6f23f44c58b614956806b08043d7d32d953c..e06501c8ce537b9bedb07a2f4ad8a0bf2947b863 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/SpaceToBatchND.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -34,14 +35,19 @@ void Check(std::initializer_list<int32_t> input_shape,
             std::initializer_list<int32_t> block_shape_data,
             std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -55,17 +61,23 @@ void Check<uint8_t>(
    std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
    std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::pair<float, int32_t> input_quant_param =
      quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data);
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>(paddings_shape, paddings_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
  
    SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -93,10 +105,13 @@ TYPED_TEST(SpaceToBatchNDTest, Simple)
  
  TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
  {
-  Tensor input_tensor =
-    makeInputTensor<DataType::FLOAT32>({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
-  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2});
-  Tensor paddings_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp

index fc999372a8034c2a3b2c9dbdaa0b223cf9c26103..7c29e8cb094b418c52934e8bbd0f57bd0ebda150 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -16,7 +16,7 @@
  
  #include "SpaceToDepth.h"
  #include "Utils.h"
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSpaceToDepth.h"
  
  namespace luci_interpreter
  {
@@ -61,14 +61,14 @@ void SpaceToDepth::execute() const
    switch (input()->element_type())
    {
      case DataType::FLOAT32:
-      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
-                                          getTensorData<float>(input()), getTensorShape(output()),
-                                          getTensorData<float>(output()));
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
        break;
      case DataType::U8:
-      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
-                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
-                                          getTensorData<uint8_t>(output()));
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
        break;
      default:
        throw std::runtime_error("Unsupported type.");
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp

index 77b6655dc13242ce4b1033dec81ac8591a0aeb32..735c010b9fff585586cc80a644515782252f61ce 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/SpaceToDepth.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -35,10 +36,13 @@ TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
  
  TYPED_TEST(SpaceToDepthTest, SimpleCase)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    constexpr DataType element_type = getElementType<TypeParam>();
    std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
    Shape input_shape{1, 2, 2, 2};
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
    std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
    std::vector<int32_t> output_shape{1, 1, 1, 8};
    Tensor output_tensor = makeOutputTensor(element_type);
@@ -48,6 +52,7 @@ TYPED_TEST(SpaceToDepthTest, SimpleCase)
  
    SpaceToDepth kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp

index 0da0f37794f26fe8e7b7b38f0153d8609fb0e032..1a563f30769df859387a9da77b126c48c98820ab 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Split.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.cpp
@@ -18,7 +18,7 @@
  
  #include "Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+#include "PALSplit.h"
  
  namespace luci_interpreter
  {
@@ -56,11 +56,11 @@ void Split::execute() const
    params.num_split = _outputs.size();
    params.axis = _axis_value;
  
-#define TF_LITE_SPLIT(scalar)                                                                     \
-  {                                                                                               \
-    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
-    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
-                                 all_outputs.shapes(), all_outputs.data());                       \
+#define TF_LITE_SPLIT(scalar)                                                                    \
+  {                                                                                              \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
+    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                all_outputs.shapes(), all_outputs.data());                       \
    }
  
    switch (input()->element_type())
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp

index c558928e8a490657a1e03aeddaac4e178726644d..74d57aed31692f676f414d32641bf69d7fd08b1e 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Split.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -32,9 +33,12 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
             std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
             std::vector<std::vector<T>> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    constexpr DataType element_type = getElementType<T>();
-  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis});
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
  
    std::vector<Tensor> output_tensors;
    output_tensors.reserve(num_splits);
@@ -51,6 +55,10 @@ void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
  
    Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
    kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
    kernel.execute();
  
    for (int i = 0; i < num_splits; ++i)
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-interpreter/src/kernels/SplitV.cpp

new file mode 100644 (file)

index 0000000..2819882
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+               std::vector<Tensor *> outputs)
+  : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  auto num_split = static_cast<int32_t>(_outputs.size());
+  auto sizes_data = getTensorData<int32_t>(size_splits());
+
+  assert(size_splits()->shape().num_dims() == 1);
+  assert(size_splits()->shape().num_elements() == num_split);
+  assert(std::accumulate(sizes_data, sizes_data + num_split, 0) ==
+         input()->shape().dim(_axis_value));
+
+  auto output_shape = input()->shape();
+  for (int32_t i = 0; i < num_split; ++i)
+  {
+    output_shape.dim(_axis_value) = sizes_data[i];
+    _outputs[i]->resize(output_shape);
+  }
+}
+
+void SplitV::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    case DataType::S16:
+      TF_LITE_SPLIT(int16_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-interpreter/src/kernels/SplitV.h

new file mode 100644 (file)

index 0000000..92f6288
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+  SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+         std::vector<Tensor *> outputs);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size_splits() const { return _inputs[1]; }
+  const Tensor *axis() const { return _inputs[2]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp

new file mode 100644 (file)

index 0000000..aac0567
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+           std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+
+  auto num_splits = static_cast<int32_t>(splits_size.size());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor sizes_tensor =
+    makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    auto tmp = extractTensorData<T>(output_tensors[i]);
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_CASE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8, 9},                                             //
+      {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 10, 11, 12, 19, 20, 21},                                 //
+      {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 4, 7, 10, 13, 16, 19, 22, 25},                                 //
+      {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp

index e40a91e97de98f4237ac1dd8871dbd641914b8d7..96835fbfcc9804e67d1087a705e7ea076302b3be 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Sqrt.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,11 +30,15 @@ using namespace testing;
  void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
             std::initializer_list<float> input_data, std::initializer_list<float> output_data)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Sqrt kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
@@ -58,20 +63,25 @@ TEST(SqrtTest, SimpleSqrt)
  
  TEST(SqrtTest, Input_Output_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S32);
  
    Sqrt kernel(&input_tensor, &output_tensor);
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(AddTest, Invalid_Input_Type_NEG)
+TEST(SqrtTest, Invalid_Input_Type_NEG)
  {
-  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    Sqrt kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-interpreter/src/kernels/Square.test.cpp

index 730d6405ce25a0cf5cccd5cd13bf8e9b1ef21526..51662dea72120cd63807775813aca84f81be47ba 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Square.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Square.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Square.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,13 +30,17 @@ using namespace testing;
  
  TEST(SquareTest, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape{3, 1, 2};
    std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Square kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
diff --git a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp

index a72eaadfadeb748028a30fdb9e30fddd8e7ac5c5..2819c01e2b57280c34630058b60f771c8ce8dcad 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/SquaredDifference.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -29,15 +30,20 @@ using namespace testing;
  
  TEST(SquaredDifferenceTest, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape{3, 1, 2};
    std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
    std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
-  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
@@ -46,16 +52,21 @@ TEST(SquaredDifferenceTest, Float)
  
  TEST(SquaredDifferenceTest, FloatBroadcast)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape1{3, 1, 2};
    Shape input_shape2{1};
    std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
    std::vector<float> input_data2{1.0};
-  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1);
-  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2);
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp

index 1c81893b9f4969a73d3c955a7e8a169d4ab89a60..d3326fe9816044565db45fe27d1faf0ea792b91d 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Squeeze.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -31,8 +32,11 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<T> input_data, std::initializer_list<T> output_data,
             std::initializer_list<int32_t> squeeze_dims)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    SqueezeParams params{};
@@ -40,6 +44,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
  
    Squeeze kernel(&input_tensor, &output_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp

index 37b0dd8c528c3e3ca2f083296edf462a25ead041..c6452cdb096ea69986a54f766d25a9fac3a01150 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
  
  #include <stdexcept>
  
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp

index 66dffcaf2d47e4a71d6a11d3db0ab85cd96744e9..399cdebeda7cebea97576943893bcfee4da47527 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/StridedSlice.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -28,6 +29,8 @@ using namespace testing;
  
  TEST(StridedSliceTest, Float)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape{2, 3, 2};
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
    Shape begin_shape{3};
@@ -36,10 +39,13 @@ TEST(StridedSliceTest, Float)
    std::vector<int32_t> end_data{1, 3, 2};
    Shape strides_shape{3};
    std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
-  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
-  Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    StridedSliceParams params{};
@@ -52,6 +58,7 @@ TEST(StridedSliceTest, Float)
    StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
                        params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<int32_t> output_shape{3, 2};
@@ -62,6 +69,8 @@ TEST(StridedSliceTest, Float)
  
  TEST(StridedSliceTest, Uint8)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    Shape input_shape{2, 3, 2};
    std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
    Shape begin_shape{3};
@@ -70,10 +79,13 @@ TEST(StridedSliceTest, Uint8)
    std::vector<int32_t> end_data{1, 3, 2};
    Shape strides_shape{3};
    std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data);
-  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
-  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
-  Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
  
    StridedSliceParams params{};
@@ -86,6 +98,7 @@ TEST(StridedSliceTest, Uint8)
    StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
                        params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<int32_t> output_shape{3, 2};
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp

index 3c7588d6298136bcc6ca0bac14a9fb1890389d12..603c62d0fc2b645c91b8713301c953539f55cb48 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -18,7 +18,9 @@
  #include "kernels/Sub.h"
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
  
  #include <stdexcept>
  
@@ -74,9 +76,9 @@ void Sub::evalFloat() const
    }
    else
    {
-    tflite::optimized_ops::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
-                               getTensorShape(input2()), getTensorData<float>(input2()),
-                               getTensorShape(output()), getTensorData<float>(output()));
+    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
    }
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp

index f560ceb363251cc077aca44bd8935b8a1de563dc..c189f44818582ea6bc05fab5475aacb89289ff93 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Sub.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  #include <algorithm>
  
@@ -33,6 +34,14 @@ using std::vector;
  using std::transform;
  using std::initializer_list;
  
+class SubTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
  // for quantized Add, the error shouldn't exceed step
  float GetTolerance(float min, float max)
  {
@@ -40,7 +49,7 @@ float GetTolerance(float min, float max)
    return kQuantizedStep;
  }
  
-TEST(SubTest, Uint8)
+TEST_F(SubTest, Uint8)
  {
    Shape base_shape = {2, 3, 1, 2};
    vector<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
@@ -62,10 +71,10 @@ TEST(SubTest, Uint8)
    pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
    for (size_t i = 0; i < output_data.size(); ++i)
    {
-    Tensor input1_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
-    Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
-                                                         quant_param.second, test_data);
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
      Tensor output_tensor =
        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
@@ -74,6 +83,7 @@ TEST(SubTest, Uint8)
  
      Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -90,10 +100,10 @@ TEST(SubTest, Uint8)
    // Re-run with exchanged inputs.
    for (size_t i = 0; i < output_data.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
-                                                         quant_param.second, test_data);
-    Tensor input2_tensor =
-      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
      Tensor output_tensor =
        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
  
@@ -102,6 +112,7 @@ TEST(SubTest, Uint8)
  
      Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(dequantizeTensorData(output_tensor),
@@ -110,7 +121,7 @@ TEST(SubTest, Uint8)
    }
  }
  
-TEST(SubTest, Float)
+TEST_F(SubTest, Float)
  {
    Shape base_shape = {2, 3, 1, 2};
    vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
@@ -130,8 +141,10 @@ TEST(SubTest, Float)
    vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
    for (size_t i = 0; i < test_shapes.size(); ++i)
    {
-    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
-    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
      Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
      SubParams params{};
@@ -139,6 +152,7 @@ TEST(SubTest, Float)
  
      Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
      kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
      kernel.execute();
  
      EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
@@ -148,10 +162,10 @@ TEST(SubTest, Float)
    }
  }
  
-TEST(SubTest, Input_Output_Type_NEG)
+TEST_F(SubTest, Input_Output_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
-  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    SubParams params{};
@@ -161,10 +175,10 @@ TEST(SubTest, Input_Output_Type_NEG)
    EXPECT_ANY_THROW(kernel.configure());
  }
  
-TEST(SubTest, Invalid_Input_Type_NEG)
+TEST_F(SubTest, Invalid_Input_Type_NEG)
  {
-  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
-  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S64);
  
    SubParams params{};
@@ -172,6 +186,7 @@ TEST(SubTest, Invalid_Input_Type_NEG)
  
    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-interpreter/src/kernels/Tanh.cpp

index 1c3d1281d7da4a35360ebabf6b8bdc92ebd52683..c4fa16912f1df04e84e8ee7d7d1088d332c582c6 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Tanh.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
  
  namespace luci_interpreter
  {
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp

index ef727d6eb557cadfbed905aaf92ae935edaa3db1..bfae479a9c1f2802fa7405de551b99ec53d88cd0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Tanh.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -27,18 +28,28 @@ namespace
  
  using namespace testing;
  
-TEST(TanhTest, Float)
+class TanhTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
  {
    Shape input_shape{1, 2, 4, 1};
    std::vector<float> input_data{
      0, -6, 2,  4, //
      3, -2, 10, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Tanh kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -48,7 +59,7 @@ TEST(TanhTest, Float)
    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
  }
  
-TEST(TanhTest, Uint8)
+TEST_F(TanhTest, Uint8)
  {
    float kMin = -1;
    float kMax = 127.f / 128.f;
@@ -69,13 +80,15 @@ TEST(TanhTest, Uint8)
      0,  -6, 2, 4, //
      -4, -2, 8, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first,
-                                                      input_quant_param.second, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
    Tensor output_tensor =
      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
  
    Tanh kernel(&input_tensor, &output_tensor);
    kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    std::vector<float> ref_output_data{
@@ -97,7 +110,7 @@ TEST(TanhTest, Uint8)
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
  }
  
-TEST(TanhTest, InputTypeInvalid_NEG)
+TEST_F(TanhTest, InputTypeInvalid_NEG)
  {
    std::vector<int64_t> input_data{
      0,  -6, 2, 4, //
@@ -113,14 +126,16 @@ TEST(TanhTest, InputTypeInvalid_NEG)
      0,  -6, 2, 4, //
      -4, -2, 8, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
  
    Tanh kernel(&input_tensor, &output_tensor);
+  _memory_manager->allocate_memory(output_tensor);
    EXPECT_ANY_THROW(kernel.execute());
  }
  
-TEST(TanhTest, InputOutputMismatch_NEG)
+TEST_F(TanhTest, InputOutputMismatch_NEG)
  {
    std::vector<float> input_data{
      0,  -6, 2, 4, //
@@ -136,7 +151,8 @@ TEST(TanhTest, InputOutputMismatch_NEG)
      0,  -6, 2, 4, //
      -4, -2, 8, 1, //
    };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8);
  
    Tanh kernel(&input_tensor, &output_tensor);
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp

index 831dc4247b81496374cbad32c70f36f021aa0319..4d983adda2a5b322b73da9303245450474f6ba31 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -43,6 +43,11 @@ std::vector<float> dequantizeTensorData(const Tensor &tensor)
      std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
      return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
    }
+  if (tensor.element_type() == DataType::S8)
+  {
+    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
    else if (tensor.element_type() == DataType::S16)
    {
      // S16 quantization is symmetric, so zero point should be zero.
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h

index c4c73d5468a2ced600268101d17e12c55f6ff381..1f5a0c308238ef4a9abf42b7a026368cb4189c3f 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TestUtils.h
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -19,6 +19,7 @@
  #define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
  
  #include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
  
  #include <type_traits>
  
@@ -36,9 +37,11 @@ template <typename T>
  std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
  
  template <DataType DT>
-Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data)
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+                       IMemoryManager *memory_manager)
  {
    Tensor tensor(DT, shape, {}, "");
+  memory_manager->allocate_memory(tensor);
    tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
    return tensor;
  }
@@ -50,16 +53,18 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeIm
   * @param scale scale of quantized number
   * @param zero_point zero point of quantized number, should be 0 for signed datatypes
   * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
   * @return created tensor
   */
  template <DataType DT>
  Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
-                       const std::vector<float> &data)
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
  {
    using NativeT = typename DataTypeImpl<DT>::Type;
    Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
    std::vector<NativeT> quantized_data =
      quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+  memory_manager->allocate_memory(tensor);
    tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
    return tensor;
  }
@@ -72,12 +77,13 @@ Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
   * @param zero_points zero points of quantized number, should be 0 for signed datatypes
   * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
   * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
   * @return created tensor
   */
  template <DataType DT>
  Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
                         const std::vector<int32_t> &zero_points, int quantized_dimension,
-                       const std::vector<float> &data)
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
  {
    using NativeT = typename DataTypeImpl<DT>::Type;
    assert(quantized_dimension < shape.num_dims());
@@ -113,6 +119,7 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
                              part_quantized_data.end());
      }
    assert(quantized_data.size() == shape.num_elements());
+  memory_manager->allocate_memory(tensor);
    tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
    return tensor;
  }
@@ -127,12 +134,26 @@ template <typename T> constexpr DataType getElementType()
  {
    if (std::is_same<T, float>::value)
      return DataType::FLOAT32;
+  if (std::is_same<T, double>::value)
+    return DataType::FLOAT64;
    if (std::is_same<T, uint8_t>::value)
      return DataType::U8;
+  if (std::is_same<T, uint16_t>::value)
+    return DataType::U16;
+  if (std::is_same<T, uint32_t>::value)
+    return DataType::U32;
+  if (std::is_same<T, uint64_t>::value)
+    return DataType::U64;
+  if (std::is_same<T, int8_t>::value)
+    return DataType::S8;
+  if (std::is_same<T, int16_t>::value)
+    return DataType::S16;
    if (std::is_same<T, int32_t>::value)
      return DataType::S32;
    if (std::is_same<T, int64_t>::value)
      return DataType::S64;
+  if (std::is_same<T, bool>::value)
+    return DataType::BOOL;
    return DataType::Unknown;
  }
  
@@ -156,8 +177,6 @@ std::vector<T> quantize(const float *data, size_t num_elements, float scale, int
    float q_min{}, q_max{};
    if (std::is_signed<T>::value)
    {
-    // For now, assume that signed type implies signed symmetric quantization.
-    assert(zero_point == 0);
      q_min = -std::numeric_limits<T>::max();
      q_max = std::numeric_limits<T>::max();
    }
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp

index c1a11cdb016b66b40d3c71b507b1bddfa512881f..802d87295a71fe81fce9e01a93b58bb32ac3dbec 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Transpose.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp
@@ -18,7 +18,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
  
  #include <stdexcept>
  
@@ -37,7 +37,7 @@ void Transpose::configure()
  {
    // Transpose op only supports 1D-4D input arrays.
    int dims = input()->shape().num_dims();
-  const int *perm_data = getTensorData<int32_t>(perm());
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
  
    assert(input()->shape().num_dims() <= 4);
    assert(input()->element_type() == output()->element_type());
@@ -58,8 +58,8 @@ void Transpose::configure()
  void Transpose::execute() const
  {
    tflite::TransposeParams params{};
-  const int *perm_data = getTensorData<int32_t>(perm());
-  const int size = perm()->shape().dim(0);
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+  const int32_t size = perm()->shape().dim(0);
    params.perm_count = size;
    for (int i = 0; i < size; i++)
      params.perm[i] = perm_data[i];
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp

index f0a915c35985a62fa04d127001344bd8507de6dc..1071799106e1736d18941d1e4222fab04b2d9184 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/Transpose.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -31,13 +32,16 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
             std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
             std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
-  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(element_type);
  
    Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp

index 0c70756b236ce92ab4be1dd41353bb8524a4727f..1b5f9d9416cc620413dfbee9c8e76e0cebecd6b0 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -19,7 +19,7 @@
  
  #include "kernels/Utils.h"
  
-#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
  
  #include <stdexcept>
  
@@ -30,8 +30,10 @@ namespace kernels
  {
  
  TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                             const Tensor *bias, Tensor *output, const TransposeConvParams &params)
-  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias}, {output}, params)
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
  {
  }
  
@@ -74,15 +76,18 @@ void TransposeConv::configure()
  
    if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
    {
-    DataType scratch_data_type =
-      input()->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
-    _scratch_tensor =
-      std::make_unique<Tensor>(scratch_data_type, output()->shape(), AffineQuantization{}, "");
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
      const std::vector<double> real_multipliers =
        getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
  
      _quant_multipliers = quantizeMultipliers(real_multipliers);
    }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
  }
  
  void TransposeConv::execute() const
@@ -111,8 +116,6 @@ void TransposeConv::execute() const
      default:
        throw std::runtime_error("Unsupported type.");
    }
-  if (!!_scratch_tensor)
-    _scratch_tensor->deallocate();
  }
  
  void TransposeConv::evalFloat() const
@@ -148,13 +151,15 @@ void TransposeConv::evalQuantized() const
    op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
    op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
  
+  auto scratch_tensor = getOutputTensors()[1];
+
    tflite::reference_ops::TransposeConv(op_params,                                                //
                                         getTensorShape(input()), getTensorData<uint8>(input()),   //
                                         getTensorShape(filter()), getTensorData<uint8>(filter()), //
                                         getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
                                         getTensorShape(output()), getTensorData<uint8>(output()), //
                                         tflite::RuntimeShape(), nullptr,                          //
-                                       getTensorData<int32_t>(_scratch_tensor.get()));
+                                       getTensorData<int32_t>(scratch_tensor));
  }
  
  void TransposeConv::evalQuantizedPerChannel() const
@@ -163,7 +168,9 @@ void TransposeConv::evalQuantizedPerChannel() const
    const auto *filter_data = getTensorData<uint8_t>(filter());
    const auto *bias_data = getTensorData<int32_t>(bias());
    auto *output_data = getTensorData<uint8_t>(output());
-  auto *scratch_data = getTensorData<int32_t>(_scratch_tensor.get());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
  
    const Shape &input_shape = input()->shape();
    const Shape &filter_shape = filter()->shape();
@@ -186,7 +193,7 @@ void TransposeConv::evalQuantizedPerChannel() const
    int32_t activation_max{};
    calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
  
-  std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int32_t));
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
  
    BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
    for (int32_t batch = 0; batch < batches; ++batch)
@@ -255,7 +262,9 @@ void TransposeConv::evalQuantizedS16() const
    const auto *filter_data = getTensorData<int16_t>(filter());
    const auto *bias_data = getTensorData<int64_t>(bias());
    auto *output_data = getTensorData<int16_t>(output());
-  auto *scratch_data = getTensorData<int64_t>(_scratch_tensor.get());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
  
    const Shape &input_shape = input()->shape();
    const Shape &filter_shape = filter()->shape();
@@ -278,7 +287,7 @@ void TransposeConv::evalQuantizedS16() const
    int32_t activation_max{};
    calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
  
-  std::memset(scratch_data, 0, _scratch_tensor->shape().num_elements() * sizeof(int64_t));
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
  
    BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
    for (int32_t batch = 0; batch < batches; ++batch)
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h

index 2e0beece8697584e4a3d068adf2c9d4f2e558889..cea0cf3c7dd484c9cf31e387f6452702f5ad7981 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -31,7 +31,8 @@ class TransposeConv : public KernelWithParams<TransposeConvParams>
  {
  public:
    TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
-                const Tensor *bias, Tensor *output, const TransposeConvParams &params);
+                const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                const TransposeConvParams &params);
  
    ~TransposeConv();
  
@@ -51,8 +52,6 @@ private:
    void evalQuantizedS16() const;
  
  private:
-  std::unique_ptr<Tensor> _scratch_tensor;
-
    int32_t _padding_height{};
    int32_t _padding_width{};
    // The scaling factor from input to output (aka the 'real multiplier') can
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp

index 9bcb015c1bc17200b680007dcfd012d5d8145303..4856e1b87d28e9778c88c20dd9d1032eb61a9711 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -16,6 +16,7 @@
  
  #include "kernels/TransposeConv.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -35,11 +36,18 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
             std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
             int32_t stride_width)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    constexpr DataType element_type = getElementType<T>();
    Tensor output_shape_tensor =
-    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
-  Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data);
-  Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data);
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+  Tensor weight_tensor =
+    makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+  Tensor input_data_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
    Tensor output_tensor = makeOutputTensor(element_type);
  
    TransposeConvParams params{};
@@ -49,17 +57,22 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
  
    if (bias_data.size() != 0)
    {
-    Tensor bias_tensor = makeInputTensor<getElementType<B>()>(bias_shape, bias_data);
+    Tensor bias_tensor =
+      makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
      TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
-                         &output_tensor, params);
+                         &output_tensor, &scratch_tensor, params);
      kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
      kernel.execute();
    }
    else
    {
      TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
-                         &output_tensor, params);
+                         &output_tensor, &scratch_tensor, params);
      kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
      kernel.execute();
    }
    EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
@@ -114,6 +127,8 @@ TEST(TransposeConvTest, SimpleBiasTest)
  
  TEST(TransposeConvTest, UInt8)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::vector<float> input_data{1, 2, 3, 4};
    std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
    std::vector<float> bias_data{3, 4};
@@ -131,23 +146,30 @@ TEST(TransposeConvTest, UInt8)
    auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
    auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
  
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first,
-                                                       filter_quant.second, filter_data);
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
-  Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+                                                      0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
  
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
    TransposeConvParams params{};
    params.padding = Padding::VALID;
    params.stride_height = 2;
    params.stride_width = 2;
  
    TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
-                       &output_tensor, params);
+                       &output_tensor, &scratch_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -156,6 +178,8 @@ TEST(TransposeConvTest, UInt8)
  
  TEST(TransposeConvTest, UInt8_CWQ)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    const int32_t output_channels = 2;
    std::vector<float> input_data{1, 2, 3, 4};
    std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
@@ -190,23 +214,30 @@ TEST(TransposeConvTest, UInt8_CWQ)
      bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
    std::vector<int32_t> zerop(output_channels, 0);
  
-  Tensor input_tensor =
-    makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first, input_quant.second, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::U8>({output_channels, 3, 3, 1}, filter_scales,
-                                                       filter_zerops, 0, filter_data);
-  Tensor bias_tensor =
-    makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0, bias_data);
-  Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
  
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
    TransposeConvParams params{};
    params.padding = Padding::VALID;
    params.stride_height = 2;
    params.stride_width = 2;
  
    TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
-                       &output_tensor, params);
+                       &output_tensor, &scratch_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -215,6 +246,8 @@ TEST(TransposeConvTest, UInt8_CWQ)
  
  TEST(TransposeConvTest, SInt16)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    std::vector<float> input_data{1, 2, 3, 4};
    std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
    std::vector<float> bias_data{3, 4};
@@ -227,20 +260,30 @@ TEST(TransposeConvTest, SInt16)
      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
    };
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data);
-  Tensor filter_tensor = makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data);
-  Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
  
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
    TransposeConvParams params{};
    params.padding = Padding::VALID;
    params.stride_height = 2;
    params.stride_width = 2;
  
    TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
-                       &output_tensor, params);
+                       &output_tensor, &scratch_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
@@ -249,6 +292,8 @@ TEST(TransposeConvTest, SInt16)
  
  TEST(TransposeConvTest, SInt16_CWQ_weights)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
    const int output_channels = 2;
    const Shape input_shape{1, 2, 2, 1};
    const Shape filter_shape{output_channels, 3, 3, 1};
@@ -273,21 +318,30 @@ TEST(TransposeConvTest, SInt16_CWQ_weights)
    std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
    const std::vector<int32_t> zerop(2, 0);
  
-  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data);
-  Tensor filter_tensor =
-    makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0, filter_data);
-  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data);
-  Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
    Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
  
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
    TransposeConvParams params{};
    params.padding = Padding::VALID;
    params.stride_height = 2;
    params.stride_width = 2;
  
    TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
-                       &output_tensor, params);
+                       &output_tensor, &scratch_tensor, params);
    kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
    kernel.execute();
  
    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp

index 6d611e12e624f9937eeb5edf6947f4fd008dd844..4f22c9f3058ce6d929f1597a33d4b2e5967bfdd7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -17,6 +17,7 @@
  
  #include "kernels/Unpack.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -32,10 +33,12 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
             const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
             std::vector<std::initializer_list<T>> exp_output_data)
  {
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
    constexpr DataType element_type = getElementType<T>();
    const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
  
-  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
    std::vector<Tensor> output_tensors;
    output_tensors.reserve(num_outputs);
    for (int i = 0; i < num_outputs; ++i)
@@ -54,6 +57,10 @@ void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
  
    Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
    kernel.configure();
+  for (int i = 0; i < num_outputs; i++)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
    kernel.execute();
  
    for (int i = 0; i < num_outputs; ++i)
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp

index 83faa7d7f9b4b320bb9933e9003db754c0e386e2..6e83e37f6e4cde5d2f628f993d6059eb1ea7eea2 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -91,7 +91,7 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t
  void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
                                         int32_t *activation_min, int32_t *activation_max)
  {
-  // For now, assume that signed type implies signed symmetric quantization.
+  assert(output->zero_points().size() == 1);
    int32_t qmin{};
    int32_t qmax{};
    switch (output->element_type())
@@ -101,11 +101,11 @@ void calculateActivationRangeQuantized(Activation activation, const Tensor *outp
        qmax = std::numeric_limits<uint8_t>::max();
        break;
      case DataType::S8:
-      assert(output->zero_point() == 0);
        qmin = -std::numeric_limits<int8_t>::max();
        qmax = std::numeric_limits<int8_t>::max();
        break;
      case DataType::S16:
+      // For now, assume that signed int16 type implies signed symmetric quantization.
        assert(output->zero_point() == 0);
        qmin = -std::numeric_limits<int16_t>::max();
        qmax = std::numeric_limits<int16_t>::max();
diff --git a/compiler/luci-interpreter/src/kernels/While.cpp b/compiler/luci-interpreter/src/kernels/While.cpp

index d4676467d8f603128052c636245968ebc21f4c8e..153bd1a999e09f342562bb64b0fd54cf6a09b8e7 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/While.cpp
+++ b/compiler/luci-interpreter/src/kernels/While.cpp
@@ -49,6 +49,13 @@ void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
    copy(const_src, dst);
  }
  
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+  for (auto tensor : tensors)
+    run_graph->configureAllocations(tensor);
+}
+
  } // namespace
  
  While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
@@ -78,11 +85,15 @@ void While::execute() const
    const auto &cond_inputs = _cond_graph->getInputTensors();
    const auto &cond_outputs = _cond_graph->getOutputTensors();
  
+  configureTensorsAllocations(cond_inputs, _cond_graph);
+
    copy(getInputTensors(), cond_inputs);
  
    const auto &body_inputs = _body_graph->getInputTensors();
    const auto &body_outputs = _body_graph->getOutputTensors();
  
+  configureTensorsAllocations(body_inputs, _body_graph);
+
    while (true)
    {
      _cond_graph->execute();
diff --git a/compiler/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-interpreter/src/kernels/While.test.cpp

index a066d2c1298e19bc1ad7af622aac2fc6e57de246..cb8f8913006ad4cedc94182d76552da1921861d8 100644 (file)
--- a/compiler/luci-interpreter/src/kernels/While.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/While.test.cpp
@@ -20,6 +20,7 @@
  #include "kernels/Less.h"
  #include "kernels/While.h"
  #include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
  
  namespace luci_interpreter
  {
@@ -30,14 +31,18 @@ namespace
  
  using namespace testing;
  
-RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond)
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+                                IMemoryManager *memory_manager)
  {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
    Tensor *input =
      graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
    Tensor *output =
      graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
  
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
    graph->setInputTensors({input});
    graph->setOutputTensors({output});
  
@@ -46,14 +51,18 @@ RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *i
    return graph;
  }
  
-RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add)
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+                                IMemoryManager *memory_manager)
  {
-  RuntimeGraph *graph = module->addGraph();
+  RuntimeGraph *graph = module->addGraph(memory_manager);
    Tensor *input =
      graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
    Tensor *output =
      graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
  
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
    graph->setInputTensors({input});
    graph->setOutputTensors({output});
  
@@ -66,18 +75,22 @@ RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *i
  
  TEST(WhileTest, FloatLoop10)
  {
-  Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1});
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
    Tensor output = makeOutputTensor(DataType::FLOAT32);
  
-  Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10});
-  Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1});
+  Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+  Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
  
    RuntimeModule module(nullptr);
-  RuntimeGraph *cond_graph = buildCondSubgraph(&module, DataType::FLOAT32, &input_cond);
-  RuntimeGraph *body_graph = buildBodySubgraph(&module, DataType::FLOAT32, &input_add);
+  RuntimeGraph *cond_graph =
+    buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+  RuntimeGraph *body_graph =
+    buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
  
    While kernel({&input}, {&output}, cond_graph, body_graph);
    kernel.configure();
+  memory_manager->allocate_memory(output);
    kernel.execute();
  
    EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt

index 782f4676129496446df3f1bc809bc2a9e943d5e0..974283a2ff4aaa3a750eaea5b6591596e06f7a25 100644 (file)
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -7,14 +7,23 @@ set(SOURCES
      KernelBuilder.cpp
      ModuleLoader.h
      ModuleLoader.cpp
-    RuntimeToIR.h)
+    RuntimeToIR.h
+    nodes/Builders.h)
  
-add_library(luci_interpreter_loader STATIC ${SOURCES})
-set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
-target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
-target_link_libraries(luci_interpreter_loader
-    PUBLIC luci_lang luci_interpreter_core
-    PRIVATE luci_interpreter_kernels nncc_common)
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common)
  
  if(NOT ENABLE_TEST)
    return()
@@ -24,5 +33,5 @@ nnas_find_package(GTest REQUIRED)
  
  set(TEST_SOURCES KernelBuilder.test.cpp)
  
-GTest_AddTest(luci_interpreter_loader_test ${TEST_SOURCES})
-target_link_libraries(luci_interpreter_loader_test luci_interpreter_loader)
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp

index ee45ad74734ff57b87cb3c3b62e3fe03a198ea72..b55e7c5048fa9f097809e63c36dfc8131cc9d8c3 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -57,6 +57,8 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
        return getNodeDataImpl<DataType::U8>(node, data_size);
      case DataType::FLOAT32:
        return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+    case DataType::S8:
+      return getNodeDataImpl<DataType::S8>(node, data_size);
      case DataType::S16:
        return getNodeDataImpl<DataType::S16>(node, data_size);
      case DataType::S32:
@@ -82,6 +84,7 @@ bool isExecutableNode(const luci::CircleNode *node)
      // The following nodes denote outputs of multiple-output nodes.
      case luci::CircleOpcode::CIRCLEIFOUT:
      case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
      case luci::CircleOpcode::CIRCLEUNPACKOUT:
      case luci::CircleOpcode::CIRCLEWHILEOUT:
        return false;
@@ -112,9 +115,10 @@ bool isTensorProducingNode(const luci::CircleNode *node)
  GraphLoader::GraphLoader(
    const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
    : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
-    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+    _memory_manager(memory_manager)
  {
  }
  
@@ -156,7 +160,10 @@ void GraphLoader::loadTensors()
        size_t data_size{};
        const void *const_data = getNodeData(const_node, &data_size);
        if (const_data != nullptr)
+      {
+        _memory_manager->allocate_memory(*tensor);
          tensor->writeData(const_data, data_size);
+      }
      }
  
      _node_to_tensor.emplace(node, tensor.get());
@@ -173,6 +180,7 @@ void GraphLoader::initInputOutputTensors() const
    for (size_t i = 0; i < input_nodes.size(); ++i)
    {
      input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+    _memory_manager->allocate_memory(*input_tensors[i]);
    }
    _runtime_graph->setInputTensors(input_tensors);
  
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h

index 89c5bcad7a75302fae956408d9d309bba7d2186a..fe066ecf855edc249b8bfe759164eb6e96f683cf 100644 (file)
--- a/compiler/luci-interpreter/src/loader/GraphLoader.h
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.h
@@ -19,6 +19,7 @@
  
  #include "core/RuntimeGraph.h"
  #include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
  
  #include <loco/IR/Graph.h>
  
@@ -32,7 +33,8 @@ class GraphLoader
  public:
    GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
                const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+              IMemoryManager *memory_manager);
  
    void loadTensors();
    void initInputOutputTensors() const;
@@ -42,6 +44,7 @@ private:
    const loco::Graph *_graph;
    RuntimeGraph *_runtime_graph;
    RuntimeToIR &_runtime_to_ir;
+  IMemoryManager *_memory_manager;
  
    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
    std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp

index 4cb8bd691b8e40b143bde53533f722d05d50a851..8483a9a3d705d3f0872c313a4c787d685f049953 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -15,1240 +15,90 @@
   */
  
  #include "loader/KernelBuilder.h"
-
-#include "kernels/Add.h"
-#include "kernels/ArgMax.h"
-#include "kernels/AveragePool2D.h"
-#include "kernels/BatchToSpaceND.h"
-#include "kernels/Cast.h"
-#include "kernels/Concatenation.h"
-#include "kernels/Conv2D.h"
-#include "kernels/DepthToSpace.h"
-#include "kernels/DepthwiseConv2D.h"
-#include "kernels/Div.h"
-#include "kernels/Elu.h"
-#include "kernels/Exp.h"
-#include "kernels/Floor.h"
-#include "kernels/FloorDiv.h"
-#include "kernels/Equal.h"
-#include "kernels/FullyConnected.h"
-#include "kernels/Greater.h"
-#include "kernels/GreaterEqual.h"
-#include "kernels/If.h"
-#include "kernels/InstanceNorm.h"
-#include "kernels/L2Normalize.h"
-#include "kernels/L2Pool2D.h"
-#include "kernels/LeakyRelu.h"
-#include "kernels/Less.h"
-#include "kernels/LessEqual.h"
-#include "kernels/LocalResponseNormalization.h"
-#include "kernels/LogicalAnd.h"
-#include "kernels/LogicalNot.h"
-#include "kernels/LogicalOr.h"
-#include "kernels/Logistic.h"
-#include "kernels/LogSoftmax.h"
-#include "kernels/Maximum.h"
-#include "kernels/MaxPool2D.h"
-#include "kernels/Mean.h"
-#include "kernels/Minimum.h"
-#include "kernels/MirrorPad.h"
-#include "kernels/Mul.h"
-#include "kernels/Neg.h"
-#include "kernels/NotEqual.h"
-#include "kernels/Pack.h"
-#include "kernels/Pad.h"
-#include "kernels/PadV2.h"
-#include "kernels/Pow.h"
-#include "kernels/PRelu.h"
-#include "kernels/Relu.h"
-#include "kernels/Relu6.h"
-#include "kernels/Reshape.h"
-#include "kernels/ResizeBilinear.h"
-#include "kernels/ResizeNearestNeighbor.h"
-#include "kernels/ReverseV2.h"
-#include "kernels/Rsqrt.h"
-#include "kernels/Slice.h"
-#include "kernels/Softmax.h"
-#include "kernels/SpaceToBatchND.h"
-#include "kernels/SpaceToDepth.h"
-#include "kernels/Split.h"
-#include "kernels/StridedSlice.h"
-#include "kernels/Sqrt.h"
-#include "kernels/Square.h"
-#include "kernels/SquaredDifference.h"
-#include "kernels/Squeeze.h"
-#include "kernels/Sub.h"
-#include "kernels/Tanh.h"
-#include "kernels/Unpack.h"
-#include "kernels/Transpose.h"
-#include "kernels/TransposeConv.h"
-#include "kernels/While.h"
+#include "loader/nodes/Builders.h"
  
  #include <stdexcept>
  
-namespace
-{
-
-template <typename CircleNodeOut>
-std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node)
-{
-  std::vector<const CircleNodeOut *> output_nodes;
-  for (const loco::Node *loco_node : loco::succs(node))
-  {
-    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
-  }
-  std::sort(output_nodes.begin(), output_nodes.end(),
-            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
-              return node1->index() < node2->index();
-            });
-  return {output_nodes.cbegin(), output_nodes.cend()};
-}
-
-} // namespace
-
  namespace luci_interpreter
  {
  
-// TODO move to anonymous namespace
-enum class KB
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
+
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
  {
-  ABC,
-  DEF,
-  GHIJ,
-  KLMN,
-  OPQR,
-  STUV,
-  WXYZ,
+#include <luci/IR/CircleNodes.lst>
+  Size // casts to count of values in BuilderId enum
  };
  
-#define DECLARE_VISIT(CLASS) std::unique_ptr<Kernel> visit(const luci::CLASS *) override
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
  
-template <KB kb> class KernelBuilderLet;
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
  
-template <>
-class KernelBuilderLet<KB::ABC> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                  public KernelBuilderHelper
+class KernelBuilderRegistry
  {
  public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-  {
-  }
+  using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+                                                    KernelBuilderHelper &);
  
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleAdd);
-  DECLARE_VISIT(CircleArgMax);
-  DECLARE_VISIT(CircleAveragePool2D);
-  DECLARE_VISIT(CircleBatchToSpaceND);
-  DECLARE_VISIT(CircleCast);
-  DECLARE_VISIT(CircleConcatenation);
-  DECLARE_VISIT(CircleConst);
-  DECLARE_VISIT(CircleConv2D);
-};
-
-template <>
-class KernelBuilderLet<KB::DEF> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                  public KernelBuilderHelper
-{
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+  KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
    {
-  }
+#define REGISTER_KERNEL(name) \
+  register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
  
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleDepthToSpace);
-  DECLARE_VISIT(CircleDepthwiseConv2D);
-  DECLARE_VISIT(CircleDiv);
-  DECLARE_VISIT(CircleElu);
-  DECLARE_VISIT(CircleEqual);
-  DECLARE_VISIT(CircleExp);
-  DECLARE_VISIT(CircleFloor);
-  DECLARE_VISIT(CircleFloorDiv);
-  DECLARE_VISIT(CircleFullyConnected);
-};
+#include "KernelsToBuild.lst"
  
-template <>
-class KernelBuilderLet<KB::GHIJ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                   public KernelBuilderHelper
-{
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-  {
+#undef REGISTER_KERNEL
    }
  
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleGreater);
-  DECLARE_VISIT(CircleGreaterEqual);
-  DECLARE_VISIT(CircleIf);
-  DECLARE_VISIT(CircleInput);
-  DECLARE_VISIT(CircleInstanceNorm);
-};
-
-template <>
-class KernelBuilderLet<KB::KLMN> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                   public KernelBuilderHelper
-{
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+  KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
    {
+    return _operator_builders.at(size_t(opcode));
    }
  
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleL2Normalize);
-  DECLARE_VISIT(CircleL2Pool2D);
-  DECLARE_VISIT(CircleLeakyRelu);
-  DECLARE_VISIT(CircleLess);
-  DECLARE_VISIT(CircleLessEqual);
-  DECLARE_VISIT(CircleLocalResponseNormalization);
-  DECLARE_VISIT(CircleLogSoftmax);
-  DECLARE_VISIT(CircleLogicalAnd);
-  DECLARE_VISIT(CircleLogicalNot);
-  DECLARE_VISIT(CircleLogicalOr);
-  DECLARE_VISIT(CircleLogistic);
-  DECLARE_VISIT(CircleMaxPool2D);
-  DECLARE_VISIT(CircleMaximum);
-  DECLARE_VISIT(CircleMean);
-  DECLARE_VISIT(CircleMinimum);
-  DECLARE_VISIT(CircleMirrorPad);
-  DECLARE_VISIT(CircleMul);
-  DECLARE_VISIT(CircleNeg);
-  DECLARE_VISIT(CircleNotEqual);
-};
+private:
+  std::vector<KernelBuilderFunc *> _operator_builders;
  
-template <>
-class KernelBuilderLet<KB::OPQR> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                   public KernelBuilderHelper
-{
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+  void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
    {
+    // Using BuilderId is a duplicate of luci::CirclreOpcode,
+    // size_t(id) is equal to size_t(corresponding operation opcode).
+    assert(size_t(id) < _operator_builders.size());
+    _operator_builders[size_t(id)] = func;
    }
-
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleOutput);
-  DECLARE_VISIT(CirclePRelu);
-  DECLARE_VISIT(CirclePack);
-  DECLARE_VISIT(CirclePad);
-  DECLARE_VISIT(CirclePadV2);
-  DECLARE_VISIT(CirclePow);
-  DECLARE_VISIT(CircleRelu);
-  DECLARE_VISIT(CircleRelu6);
-  DECLARE_VISIT(CircleReshape);
-  DECLARE_VISIT(CircleResizeBilinear);
-  DECLARE_VISIT(CircleResizeNearestNeighbor);
-  DECLARE_VISIT(CircleReverseV2);
-  DECLARE_VISIT(CircleRsqrt);
  };
  
-template <>
-class KernelBuilderLet<KB::STUV> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                   public KernelBuilderHelper
+KernelBuilder::KernelBuilder(
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
  {
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-  {
-  }
-
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleSlice);
-  DECLARE_VISIT(CircleSoftmax);
-  DECLARE_VISIT(CircleSpaceToBatchND);
-  DECLARE_VISIT(CircleSpaceToDepth);
-  DECLARE_VISIT(CircleSplit);
-  DECLARE_VISIT(CircleSqrt);
-  DECLARE_VISIT(CircleSquare);
-  DECLARE_VISIT(CircleSquaredDifference);
-  DECLARE_VISIT(CircleSqueeze);
-  DECLARE_VISIT(CircleStridedSlice);
-  DECLARE_VISIT(CircleSub);
-  DECLARE_VISIT(CircleTanh);
-  DECLARE_VISIT(CircleTranspose);
-  DECLARE_VISIT(CircleTransposeConv);
-  DECLARE_VISIT(CircleUnpack);
-};
+  _builder_registry = std::make_unique<KernelBuilderRegistry>();
+}
  
-template <>
-class KernelBuilderLet<KB::WXYZ> : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>,
-                                   public KernelBuilderHelper
+KernelBuilder::~KernelBuilder()
  {
-public:
-  KernelBuilderLet(
-    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-  {
-  }
-
-public:
-  std::unique_ptr<Kernel> visit(const luci::CircleNode *) { return nullptr; }
-
-public:
-  DECLARE_VISIT(CircleWhile);
-};
-
-#undef DECLARE_VISIT
+  // Need to define in this CPP to hide KernelBuilderRegistry internals.
+  // This destructor deletes _builder_registry
+}
  
  std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
  {
-#define VISIT_KB(GRP)                                                          \
-  do                                                                           \
-  {                                                                            \
-    KernelBuilderLet<KB::GRP> kbl(graph_to_runtime_graph(), node_to_tensor()); \
-    auto ret = node->accept(&kbl);                                             \
-    if (ret != nullptr)                                                        \
-      return ret;                                                              \
-  } while (false)
-
-  VISIT_KB(ABC);
-  VISIT_KB(DEF);
-  VISIT_KB(GHIJ);
-  VISIT_KB(KLMN);
-  VISIT_KB(OPQR);
-  VISIT_KB(STUV);
-  VISIT_KB(WXYZ);
+  auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+  if (specific_builder != nullptr)
+    return specific_builder(node, *this);
  
-#undef VISIT_KB
    std::string msg = "Unsupported operator: ";
    msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
    throw std::invalid_argument(msg.c_str());
  }
  
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAdd *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  AddParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Add>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleArgMax *node)
-{
-  assert(node->arity() == 2);
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *axis = getInputTensor(node->dimension());
-  Tensor *output = getOutputTensor(node);
-
-  ArgMaxParams params{};
-  params.output_type = node->output_type();
-
-  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleAveragePool2D *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::AveragePool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleBatchToSpaceND *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *block_shape = getInputTensor(node->block_shape());
-  const Tensor *crops = getInputTensor(node->crops());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleCast *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Cast>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConcatenation *node)
-{
-  std::vector<const Tensor *> inputs(node->numValues());
-  for (uint32_t i = 0; i < node->numValues(); ++i)
-  {
-    inputs[i] = getInputTensor(node->values(i));
-  }
-  Tensor *output = getOutputTensor(node);
-
-  ConcatenationParams params{};
-  params.axis = node->axis();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConst *)
-{
-  throw std::runtime_error("Const node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::ABC>::visit(const luci::CircleConv2D *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *bias = getInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
-
-  Conv2DParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthToSpace *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  DepthToSpaceParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::DepthToSpace>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDepthwiseConv2D *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *bias = getInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
-
-  DepthwiseConv2DParams params{};
-  params.padding = node->padding();
-  params.depth_multiplier = node->depthMultiplier();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.dilation_height_factor = node->dilation()->h();
-  params.dilation_width_factor = node->dilation()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleDiv *node)
-{
-  assert(node->arity() == 2);
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  DivParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Div>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleElu *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Elu>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleEqual *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Equal>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleExp *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Exp>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloor *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Floor>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFloorDiv *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::FloorDiv>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::DEF>::visit(const luci::CircleFullyConnected *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *weights = getInputTensor(node->weights());
-  const Tensor *bias = getOptionalInputTensor(node->bias());
-  Tensor *output = getOutputTensor(node);
-
-  FullyConnectedParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreater *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Greater>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleGreaterEqual *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::GreaterEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleIf *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
-  assert(node->arity() == 1 + node->input_count());
-  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
-  const Tensor *cond = getInputTensor(node->cond());
-  std::vector<const Tensor *> inputs(node->input_count());
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-  {
-    inputs[i] = getInputTensor(node->input(i));
-  }
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph());
-  RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph());
-
-  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
-                                       else_graph);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInstanceNorm *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *gamma = getInputTensor(node->gamma());
-  const Tensor *beta = getInputTensor(node->beta());
-
-  Tensor *output = getOutputTensor(node);
-
-  InstanceNormParams params{};
-  params.epsilon = node->epsilon();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::GHIJ>::visit(const luci::CircleInput *)
-{
-  throw std::runtime_error("Input node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Normalize *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  L2NormParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Normalize>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleL2Pool2D *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::L2Pool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLeakyRelu *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  LeakyReluParams params{};
-  params.alpha = node->alpha();
-
-  return std::make_unique<kernels::LeakyRelu>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLess *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Less>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLessEqual *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::LessEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel>
-KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLocalResponseNormalization *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  LocalResponseNormalizationParams params{};
-  params.radius = node->radius();
-  params.bias = node->bias();
-  params.alpha = node->alpha();
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalAnd *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalNot *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalNot>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogicalOr *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogistic *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Logistic>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleLogSoftmax *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->logits());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::LogSoftmax>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaximum *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Maximum>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMaxPool2D *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->value());
-  Tensor *output = getOutputTensor(node);
-
-  Pool2DParams params{};
-  params.padding = node->padding();
-  params.filter_height = node->filter()->h();
-  params.filter_width = node->filter()->w();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::MaxPool2D>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMean *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *axes = getInputTensor(node->reduction_indices());
-  Tensor *output = getOutputTensor(node);
-
-  ReducerParams params{};
-  params.keep_dims = node->keep_dims();
-
-  return std::make_unique<kernels::Mean>(input, axes, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMinimum *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Minimum>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMirrorPad *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *paddings = getInputTensor(node->paddings());
-  Tensor *output = getOutputTensor(node);
-
-  MirrorPadParams params{};
-  params.mode = node->mode();
-
-  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleMul *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  MulParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Mul>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNeg *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Neg>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::KLMN>::visit(const luci::CircleNotEqual *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *x = getInputTensor(node->x());
-  const Tensor *y = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::NotEqual>(x, y, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleOutput *)
-{
-  throw std::runtime_error("Output node cannot be executed.");
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePack *node)
-{
-  assert(node->arity() == node->values_count());
-
-  std::vector<const Tensor *> inputs(node->values_count());
-  for (uint32_t i = 0; i < node->values_count(); ++i)
-  {
-    inputs[i] = getInputTensor(node->values(i));
-  }
-  Tensor *output = getOutputTensor(node);
-
-  PackParams params{};
-  params.axis = node->axis();
-  params.values_count = node->values_count();
-
-  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePad *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *paddings = getInputTensor(node->paddings());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Pad>(input, paddings, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePadV2 *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *paddings = getInputTensor(node->paddings());
-  const Tensor *constant_values = getInputTensor(node->constant_values());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePow *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Pow>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CirclePRelu *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *alpha = getInputTensor(node->alpha());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::PRelu>(input, alpha, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Relu>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRelu6 *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->features());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Relu6>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReshape *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->tensor());
-  const Tensor *shape = getInputTensor(node->shape());
-  Tensor *output = getOutputTensor(node);
-
-  // NOTE 'newShape' attribute is ignored.
-  return std::make_unique<kernels::Reshape>(input, shape, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeBilinear *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *size = getInputTensor(node->size());
-  Tensor *output = getOutputTensor(node);
-
-  ResizeBilinearParams params{};
-  params.align_corners = node->align_corners();
-  params.half_pixel_centers = node->half_pixel_centers();
-
-  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
-}
-
-std::unique_ptr<Kernel>
-KernelBuilderLet<KB::OPQR>::visit(const luci::CircleResizeNearestNeighbor *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *size = getInputTensor(node->size());
-  Tensor *output = getOutputTensor(node);
-
-  ResizeNearestNeighborParams params{};
-  params.align_corners = node->align_corners();
-  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
-  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
-  // default value on current is false.
-  // it need to be updated when CircleResizeNearestNeighbor updated.
-  params.half_pixel_centers = false;
-
-  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleReverseV2 *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->tensor());
-  const Tensor *axes = getInputTensor(node->axis());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::ReverseV2>(input, axes, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::OPQR>::visit(const luci::CircleRsqrt *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Rsqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSlice *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *begin = getInputTensor(node->begin());
-  const Tensor *size = getInputTensor(node->size());
-
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Slice>(input, begin, size, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSoftmax *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->logits());
-  Tensor *output = getOutputTensor(node);
-
-  SoftmaxParams params{};
-  params.beta = node->beta();
-
-  return std::make_unique<kernels::Softmax>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToBatchND *node)
-{
-  assert(node->arity() == 3);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *block_shape = getInputTensor(node->block_shape());
-  const Tensor *paddings = getInputTensor(node->paddings());
-
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
-  ;
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSpaceToDepth *node)
-{
-  assert(node->arity() == 1);
-  const Tensor *input = getInputTensor(node->input());
-
-  Tensor *output = getOutputTensor(node);
-
-  SpaceToDepthParams params{};
-  params.block_size = node->block_size();
-
-  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSplit *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
-  assert(node->arity() == 2);
-  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
-
-  const Tensor *axis = getInputTensor(node->split_dim());
-  const Tensor *input = getInputTensor(node->input());
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  // NOTE 'num_splits' attribute is ignored.
-  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqrt *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Sqrt>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquare *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Square>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSquaredDifference *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSqueeze *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->input());
-  Tensor *output = getOutputTensor(node);
-
-  SqueezeParams params{};
-  params.squeeze_dims = node->squeeze_dims();
-
-  return std::make_unique<kernels::Squeeze>(input, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleStridedSlice *node)
-{
-  assert(node->arity() == 4);
-
-  const Tensor *input = getInputTensor(node->input());
-  const Tensor *begin = getInputTensor(node->begin());
-  const Tensor *end = getInputTensor(node->end());
-  const Tensor *strides = getInputTensor(node->strides());
-
-  Tensor *output = getOutputTensor(node);
-
-  StridedSliceParams params{};
-  params.begin_mask = node->begin_mask();
-  params.ellipsis_mask = node->ellipsis_mask();
-  params.end_mask = node->end_mask();
-  params.new_axis_mask = node->new_axis_mask();
-  params.shrink_axis_mask = node->shrink_axis_mask();
-
-  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleSub *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input1 = getInputTensor(node->x());
-  const Tensor *input2 = getInputTensor(node->y());
-  Tensor *output = getOutputTensor(node);
-
-  SubParams params{};
-  params.activation = node->fusedActivationFunction();
-
-  return std::make_unique<kernels::Sub>(input1, input2, output, params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTanh *node)
-{
-  assert(node->arity() == 1);
-
-  const Tensor *input = getInputTensor(node->x());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Tanh>(input, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTranspose *node)
-{
-  assert(node->arity() == 2);
-
-  const Tensor *input = getInputTensor(node->a());
-  const Tensor *perm = getInputTensor(node->perm());
-  Tensor *output = getOutputTensor(node);
-
-  return std::make_unique<kernels::Transpose>(input, perm, output);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleTransposeConv *node)
-{
-  assert(node->arity() == 4);
-
-  const Tensor *input_sizes = getInputTensor(node->inputSizes());
-  const Tensor *filter = getInputTensor(node->filter());
-  const Tensor *out_backprop = getInputTensor(node->outBackprop());
-  const Tensor *bias = getOptionalInputTensor(node->bias());
-
-  Tensor *output = getOutputTensor(node);
-
-  TransposeConvParams params{};
-  params.padding = node->padding();
-  params.stride_height = node->stride()->h();
-  params.stride_width = node->stride()->w();
-
-  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
-                                                  params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::STUV>::visit(const luci::CircleUnpack *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
-  assert(node->arity() == 1);
-  assert(output_nodes.size() == static_cast<size_t>(node->num()));
-
-  const Tensor *input = getInputTensor(node->value());
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  UnpackParams params{};
-  params.axis = node->axis();
-
-  // NOTE 'num' attribute is ignored.
-  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
-}
-
-std::unique_ptr<Kernel> KernelBuilderLet<KB::WXYZ>::visit(const luci::CircleWhile *node)
-{
-  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
-  assert(node->arity() == node->input_count());
-  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
-
-  std::vector<const Tensor *> inputs(node->input_count());
-  for (uint32_t i = 0; i < node->input_count(); ++i)
-  {
-    inputs[i] = getInputTensor(node->input(i));
-  }
-  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
-
-  RuntimeGraph *cond_graph = getRuntimeGraph(node->cond_graph());
-  RuntimeGraph *body_graph = getRuntimeGraph(node->body_graph());
-
-  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
-                                          body_graph);
-}
-
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h

index 406c41ef6ea6c888ad3f5677b122f66eac4caa87..b1f38339437bd7415b20f3857fd2ada9c8db4015 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -30,17 +30,21 @@
  namespace luci_interpreter
  {
  
+class KernelBuilderRegistry;
+
  class KernelBuilder : public KernelBuilderHelper
  {
  public:
    KernelBuilder(
      const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
-    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
-    : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
-  {
-  }
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+  ~KernelBuilder();
  
    std::unique_ptr<Kernel> build(const luci::CircleNode *node);
+
+private:
+  std::unique_ptr<KernelBuilderRegistry> _builder_registry;
  };
  
  } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp

index d8611243e66ff1e98a4087b94ea287986aa76dc9..7a457a62f9f257976aa5054393e28e9978654af7 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -16,6 +16,7 @@
  
  #include "loader/GraphLoader.h"
  #include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
  
  #include <kernels/Add.h>
  #include <kernels/ArgMax.h>
@@ -68,6 +69,7 @@
  #include <kernels/Softmax.h>
  #include <kernels/SpaceToDepth.h>
  #include <kernels/Split.h>
+#include <kernels/SplitV.h>
  #include <kernels/Sqrt.h>
  #include <kernels/SquaredDifference.h>
  #include <kernels/Squeeze.h>
@@ -91,6 +93,9 @@ class KernelBuilderTest : public Test
  {
  protected:
    luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+  void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
  
    template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
    {
@@ -114,10 +119,11 @@ protected:
    {
      std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
  
-    RuntimeGraph runtime_graph(nullptr);
+    RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+    graph_to_runtime_graph[&_graph] = &runtime_graph;
      RuntimeToIR runtime_to_ir;
      GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
-                             _node_to_tensor);
+                             _node_to_tensor, _memory_manager.get());
      graph_loader.loadTensors();
  
      KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
@@ -1091,6 +1097,31 @@ TEST_F(KernelBuilderTest, Split)
    checkTensor(kernel->output(1), output2);
  }
  
+TEST_F(KernelBuilderTest, SplitV)
+{
+  auto *input = createInputNode();
+  auto *size_splits = createInputNode();
+  auto *axis = createInputNode();
+  auto *op = createNode<luci::CircleSplitV>();
+  auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+  auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+  op->input(input);
+  op->size_splits(size_splits);
+  op->split_dim(axis);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::SplitV>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size_splits(), size_splits);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(0), output0);
+  checkTensor(kernel->output(1), output1);
+}
+
  TEST_F(KernelBuilderTest, Sqrt)
  {
    auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h

index 4517d1f19d5ddb14daa59623328e0cdcfec0b214..d6fb253b17dcc4e519c12b84924958a635123614 100644 (file)
--- a/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
+++ b/compiler/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -39,7 +39,7 @@ public:
    {
    }
  
-protected:
+public:
    const Tensor *getInputTensor(const loco::Node *node) const;
    const Tensor *getOptionalInputTensor(const loco::Node *node) const;
  
@@ -48,7 +48,7 @@ protected:
  
    RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
  
-protected:
+public:
    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
    {
      return _graph_to_runtime_graph;
@@ -64,6 +64,21 @@ private:
    const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
  };
  
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+  std::vector<const CircleNodeOut *> output_nodes;
+  for (const loco::Node *loco_node : loco::succs(node))
+  {
+    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+  }
+  std::sort(output_nodes.begin(), output_nodes.end(),
+            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+              return node1->index() < node2->index();
+            });
+  return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
  } // namespace luci_interpreter
  
  #endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp

index ff211bf090da8f3bd500825b36e8d432abb36bb2..2f278b0878ebeac9e8bc324a636b86956d859818 100644 (file)
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -23,9 +23,10 @@ namespace luci_interpreter
  
  ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
                             RuntimeToIR &runtime_to_ir,
-                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+                           IMemoryManager *memory_manager)
    : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
-    _node_to_tensor(node_to_tensor)
+    _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
  {
  }
  
@@ -35,14 +36,14 @@ void ModuleLoader::load()
    // process for control flow nodes.
    for (size_t i = 0; i < _module->size(); ++i)
    {
-    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph());
+    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
    }
    for (size_t i = 0; i < _module->size(); ++i)
    {
      const loco::Graph *graph = _module->graph(i);
      RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
      GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
-                       _node_to_tensor);
+                       _node_to_tensor, _memory_manager);
      loader.loadTensors();
      loader.initInputOutputTensors();
      loader.loadOperators();
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h

index 1af0ed747652a9f93094b66c6df2ea7685d28510..11326a2ee8c226bd1787281fd6c5c31b6b73583c 100644 (file)
--- a/compiler/luci-interpreter/src/loader/ModuleLoader.h
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h
@@ -19,6 +19,7 @@
  
  #include "core/RuntimeModule.h"
  #include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
  
  #include <luci/IR/Module.h>
  
@@ -32,11 +33,13 @@ class ModuleLoader
  public:
    ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
                 RuntimeToIR &runtime_to_ir,
-               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+               IMemoryManager *memory_manager);
  
    void load();
  
  private:
+  IMemoryManager *_memory_manager;
    const luci::Module *_module;
    RuntimeModule *_runtime_module;
    RuntimeToIR &_runtime_to_ir;
diff --git a/compiler/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-interpreter/src/loader/nodes/Add.cpp

new file mode 100644 (file)

index 0000000..decccaa
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleAdd *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  AddParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp

new file mode 100644 (file)

index 0000000..0ee3677
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleArgMax *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->dimension());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ArgMaxParams params{};
+  params.output_type = node->output_type();
+
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp

new file mode 100644 (file)

index 0000000..5bc37bd
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleAveragePool2D *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp

new file mode 100644 (file)

index 0000000..33d0e2d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *crops = helper.getInputTensor(node->crops());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-interpreter/src/loader/nodes/Builders.h

new file mode 100644 (file)

index 0000000..eab2840
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name)                                                            \
+  std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+                                                    KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp

new file mode 100644 (file)

index 0000000..21ea5ce
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleCast *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp

new file mode 100644 (file)

index 0000000..7823a99
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleConcatenation *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  std::vector<const Tensor *> inputs(node->numValues());
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  ConcatenationParams params{};
+  params.axis = node->axis();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp

new file mode 100644 (file)

index 0000000..71c8ef3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleConv2D *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto im2col =
+    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  im2col->set_observable(false);
+  im2col->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
+
+  Conv2DParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp

new file mode 100644 (file)

index 0000000..0310fb2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleDepthToSpace *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthToSpaceParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp

new file mode 100644 (file)

index 0000000..c2f0346
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+                                                           KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthwiseConv2DParams params{};
+  params.padding = node->padding();
+  params.depth_multiplier = node->depthMultiplier();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-interpreter/src/loader/nodes/Div.cpp

new file mode 100644 (file)

index 0000000..56c2e98
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleDiv *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DivParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp

new file mode 100644 (file)

index 0000000..98ee78b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleElu *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp

new file mode 100644 (file)

index 0000000..649d9bf
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+
+{
+  const auto *node = dynamic_cast<const luci::CircleEqual *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp

new file mode 100644 (file)

index 0000000..411d142
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleExp *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp

new file mode 100644 (file)

index 0000000..6d8435f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleFloor *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp

new file mode 100644 (file)

index 0000000..cae2e18
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleFloorDiv *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp

new file mode 100644 (file)

index 0000000..2917598
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleFullyConnected *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *weights = helper.getInputTensor(node->weights());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  FullyConnectedParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp

new file mode 100644 (file)

index 0000000..3db11b8
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleGreater *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp

new file mode 100644 (file)

index 0000000..dbe051d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleGreaterEqual *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-interpreter/src/loader/nodes/If.cpp

new file mode 100644 (file)

index 0000000..5983f4d
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleIf *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+  assert(node->arity() == 1 + node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  const Tensor *cond = helper.getInputTensor(node->cond());
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+  RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+                                       else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp

new file mode 100644 (file)

index 0000000..0a8fb85
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleInstanceNorm *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *gamma = helper.getInputTensor(node->gamma());
+  const Tensor *beta = helper.getInputTensor(node->beta());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  InstanceNormParams params{};
+  params.epsilon = node->epsilon();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp

new file mode 100644 (file)

index 0000000..05f9202
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleL2Normalize *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  L2NormParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp

new file mode 100644 (file)

index 0000000..0e70afa
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleL2Pool2D *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp

new file mode 100644 (file)

index 0000000..7b229ad
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLeakyRelu *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LeakyReluParams params{};
+  params.alpha = node->alpha();
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-interpreter/src/loader/nodes/Less.cpp

new file mode 100644 (file)

index 0000000..81156f2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLess *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp

new file mode 100644 (file)

index 0000000..82141e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLessEqual *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp

new file mode 100644 (file)

index 0000000..a12dce0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = node->radius();
+  params.bias = node->bias();
+  params.alpha = node->alpha();
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp

new file mode 100644 (file)

index 0000000..6cf547a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLogSoftmax *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp

new file mode 100644 (file)

index 0000000..2c9549f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLogicalAnd *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp

new file mode 100644 (file)

index 0000000..3d327d6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLogicalNot *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp

new file mode 100644 (file)

index 0000000..50566bb
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLogicalOr *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp

new file mode 100644 (file)

index 0000000..e4160ed
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleLogistic *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp

new file mode 100644 (file)

index 0000000..914f228
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMaxPool2D *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp

new file mode 100644 (file)

index 0000000..dc50d67
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMaximum *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp

new file mode 100644 (file)

index 0000000..97d9120
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMean *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  auto temp_sum_unique =
+    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  temp_sum_unique->set_observable(false);
+  temp_sum_unique->set_data_buffer(nullptr);
+  Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+                                         params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp

new file mode 100644 (file)

index 0000000..ff65952
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMinimum *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp

new file mode 100644 (file)

index 0000000..ebf2945
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMirrorPad *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MirrorPadParams params{};
+  params.mode = node->mode();
+
+  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp

new file mode 100644 (file)

index 0000000..4f9da96
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleMul *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MulParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp

new file mode 100644 (file)

index 0000000..23c0053
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleNeg *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp

new file mode 100644 (file)

index 0000000..8e5711f
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleNotEqual *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp

new file mode 100644 (file)

index 0000000..e31601b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CirclePRelu *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *alpha = helper.getInputTensor(node->alpha());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp

new file mode 100644 (file)

index 0000000..6994720
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CirclePack *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == node->values_count());
+
+  std::vector<const Tensor *> inputs(node->values_count());
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  PackParams params{};
+  params.axis = node->axis();
+  params.values_count = node->values_count();
+
+  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp

new file mode 100644 (file)

index 0000000..7705492
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CirclePad *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp

new file mode 100644 (file)

index 0000000..12deb15
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CirclePadV2 *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp

new file mode 100644 (file)

index 0000000..b430bc9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CirclePow *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp

new file mode 100644 (file)

index 0000000..d53a66a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleRelu *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp

new file mode 100644 (file)

index 0000000..f1b5d21
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleRelu6 *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp

new file mode 100644 (file)

index 0000000..89e3ece
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleReshape *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *shape = helper.getInputTensor(node->shape());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp

new file mode 100644 (file)

index 0000000..dca5658
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleResizeBilinear *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeBilinearParams params{};
+  params.align_corners = node->align_corners();
+  params.half_pixel_centers = node->half_pixel_centers();
+
+  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp

new file mode 100644 (file)

index 0000000..d1ea19c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+                                         KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = node->align_corners();
+  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+  // default value on current is false.
+  // it need to be updated when CircleResizeNearestNeighbor updated.
+  params.half_pixel_centers = false;
+
+  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp

new file mode 100644 (file)

index 0000000..ea00f54
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleReverseV2 *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *axes = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp

new file mode 100644 (file)

index 0000000..ff87f43
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleRsqrt *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp

new file mode 100644 (file)

index 0000000..741cd08
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSlice *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *size = helper.getInputTensor(node->size());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp

new file mode 100644 (file)

index 0000000..b15e4b6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSoftmax *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SoftmaxParams params{};
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp

new file mode 100644 (file)

index 0000000..91c237a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp

new file mode 100644 (file)

index 0000000..3cbbd97
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSpaceToDepth *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  SpaceToDepthParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-interpreter/src/loader/nodes/Split.cpp

new file mode 100644 (file)

index 0000000..32553ad
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSplit *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+  assert(node->arity() == 2);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  const Tensor *input = helper.getInputTensor(node->input());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp

new file mode 100644 (file)

index 0000000..d788164
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSplitV *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+  assert(node->arity() == 3);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp

new file mode 100644 (file)

index 0000000..56dd986
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSqrt *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-interpreter/src/loader/nodes/Square.cpp

new file mode 100644 (file)

index 0000000..43aadb9
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSquare *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp

new file mode 100644 (file)

index 0000000..6a2717a
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+                                                             KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSquaredDifference *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp

new file mode 100644 (file)

index 0000000..583ff93
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSqueeze *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SqueezeParams params{};
+  params.squeeze_dims = node->squeeze_dims();
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp

new file mode 100644 (file)

index 0000000..fe5fa77
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleStridedSlice *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 4);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *end = helper.getInputTensor(node->end());
+  const Tensor *strides = helper.getInputTensor(node->strides());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  StridedSliceParams params{};
+  params.begin_mask = node->begin_mask();
+  params.ellipsis_mask = node->ellipsis_mask();
+  params.end_mask = node->end_mask();
+  params.new_axis_mask = node->new_axis_mask();
+  params.shrink_axis_mask = node->shrink_axis_mask();
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp

new file mode 100644 (file)

index 0000000..bad4fbb
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleSub *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SubParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp

new file mode 100644 (file)

index 0000000..f425529
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleTanh *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp

new file mode 100644 (file)

index 0000000..4e095fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleTranspose *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->a());
+  const Tensor *perm = helper.getInputTensor(node->perm());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp

new file mode 100644 (file)

index 0000000..1b954c3
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleTransposeConv *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  assert(node->arity() == 4);
+
+  const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  DataType scratch_data_type =
+    helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+  auto scratch_tensor =
+    std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+  scratch_tensor->set_observable(false);
+  scratch_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+  TransposeConvParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+                                                  tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp

new file mode 100644 (file)

index 0000000..978c738
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleUnpack *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+  assert(node->arity() == 1);
+  assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  UnpackParams params{};
+  params.axis = node->axis();
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-interpreter/src/loader/nodes/While.cpp

new file mode 100644 (file)

index 0000000..284dc0c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = dynamic_cast<const luci::CircleWhile *>(circle_node);
+  if (node == nullptr)
+    throw std::runtime_error("wrong builder for operation");
+
+  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+  assert(node->arity() == node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+  RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+                                          body_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/CMakeLists.txt b/compiler/luci-micro/CMakeLists.txt

new file mode 100644 (file)

index 0000000..d936e12
--- /dev/null
+++ b/compiler/luci-micro/CMakeLists.txt
@@ -0,0 +1,57 @@
+set(ARM_C_COMPILER "arm-none-eabi-gcc")
+set(ARM_ASM_COMPILER "arm-none-eabi-gcc")
+set(ARM_CXX_COMPILER "arm-none-eabi-g++")
+set(ARM_OBJCOPY "arm-none-eabi-objcopy")
+
+find_program(ARM_C_COMPILER_PATH ${ARM_C_COMPILER})
+
+if(NOT ARM_C_COMPILER_PATH)
+  message(WARNING "ARM compiler is NOT FOUND, skipping luci-micro build")
+  return()
+endif()
+
+set(CMAKE_ARM_OPTIONS
+  -DLUCI_INTERPRETER_STATIC=ON
+  -DLUCI_STATIC=ON
+  "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/standalone/Toolchain.cmake"
+  "-DLUCI_INTERPRETER_PAL_DIR=${CMAKE_CURRENT_SOURCE_DIR}/../luci-interpreter/pal/mcu"
+  "-DNNAS_PROJECT_SOURCE_DIR=${NNAS_PROJECT_SOURCE_DIR}"
+  "-DNNAS_EXTERNALS_DIR=${NNAS_EXTERNALS_DIR}"
+  -DCPU_ARCH=arm
+  -DC_COMPILER=${ARM_C_COMPILER}
+  -DCXX_COMPILER=${ARM_CXX_COMPILER}
+  -DASM_COMPILER=${ARM_ASM_COMPILER}
+  -DOBJCOPY=${ARM_OBJCOPY}
+  -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+  -DENABLE_TEST=OFF
+  -DBUILD_GTEST=OFF
+  "-DNNAS_ROOT=${NNAS_PROJECT_SOURCE_DIR}"
+  -DENABLE_STRICT_BUILD=OFF
+)
+
+set(MICRO_ARM_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/standalone_arm")
+file(MAKE_DIRECTORY "${MICRO_ARM_BUILD_DIR}")
+
+set(MICRO_ARM_BUILD_DEPENDENCY "${MICRO_ARM_BUILD_DIR}/CMakeCache.txt")
+
+add_custom_command(
+  OUTPUT "${MICRO_ARM_BUILD_DEPENDENCY}"
+  COMMAND "${CMAKE_COMMAND}" "${CMAKE_CURRENT_SOURCE_DIR}/standalone" ${CMAKE_ARM_OPTIONS}
+  WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/standalone/CMakeLists.txt"
+  VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm_cmake DEPENDS "${MICRO_ARM_BUILD_DEPENDENCY}")
+
+set(MICRO_ARM_BINARY "${MICRO_ARM_BUILD_DIR}/compiler/luci-interpreter/src/libluci_interpreter.a")
+
+add_custom_command(
+  OUTPUT "${MICRO_ARM_BINARY}"
+  COMMAND "${CMAKE_MAKE_PROGRAM}" luci_interpreter -j ${CPU_COUNT}
+  WORKING_DIRECTORY "${MICRO_ARM_BUILD_DIR}"
+  DEPENDS luci_interpreter_micro_arm_cmake
+  VERBATIM
+)
+
+add_custom_target(luci_interpreter_micro_arm DEPENDS "${MICRO_ARM_BINARY}")
diff --git a/compiler/luci-micro/README.md b/compiler/luci-micro/README.md

new file mode 100644 (file)

index 0000000..6641ad7
--- /dev/null
+++ b/compiler/luci-micro/README.md
@@ -0,0 +1,56 @@
+# luci-micro
+
+`luci-micro` is MCU specialized build of luci-interpreter with several benchmark applications.
+
+## Contents
+
+Luci-micro contains cmake infrastructure to build:
+- stand-alone interpreter library
+- benchmark applications using luci interpreter on arm MCUs
+
+## How to build stand alone library
+
+Stand-alone library is simply built by `luci_interpreter_micro_arm` target.
+Result library will be placed in  `<ONE root>/build/compiler/luci-micro/standalone_arm/luci-interpreter/src/libluci_interpreter.a`.
+
+### Prerequisites
+
+- Everything you need for ONE project: see [how-to-build-compiler.md](../../docs/howto/how-to-build-compiler.md)
+- arm-none-eabi-gcc and arm-none-eabi-g++ compilers
+
+To install needed arm compilers on ubuntu:
+```
+$ sudo apt-get install gcc-arm-none-eabi
+```
+
+**cmake build**
+
+``` bash
+$ cd <path to ONE>
+$ mkdir build
+# cd build
+$ cmake ../infra/nncc
+$ make -j$(nproc) luci_interpreter_micro_arm
+```
+
+**nncc script build**
+
+``` bash
+$ cd <path to ONE>
+$ ./nncc configure
+$ ./nncc build -j$(nproc) luci_interpreter_micro_arm
+```
+
+### Known issues
+
+Interpreter uses TensorFlow headers that produces warnings.
+
+`Linux` x86 build uses "-isystem" flag to suppress warnings from external sources,
+but some old arm compilers have issues with it:
+[bug](https://bugs.launchpad.net/gcc-arm-embedded/+bug/1698539)
+
+`-isystem` hack is disabled for MCU build, because of this MCU build is broken if `-Werror` flag is set.
+
+## How to use
+
+TBD
diff --git a/compiler/luci-micro/requires.cmake b/compiler/luci-micro/requires.cmake

new file mode 100644 (file)

index 0000000..5913aa9
--- /dev/null
+++ b/compiler/luci-micro/requires.cmake
@@ -0,0 +1 @@
+require(luci-interpreter)
diff --git a/compiler/luci-micro/standalone/CMakeLists.txt b/compiler/luci-micro/standalone/CMakeLists.txt

new file mode 100644 (file)

index 0000000..7953359
--- /dev/null
+++ b/compiler/luci-micro/standalone/CMakeLists.txt
@@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.10)
+project(luci_interpreter_micro_standalone)
+
+# Add fake target, so nothing is build
+set(BUILD_WHITELIST "dummy")
+
+add_subdirectory(${NNAS_ROOT}/infra/nncc ${CMAKE_CURRENT_BINARY_DIR}/nncc)
+
+set(ONE_COMPILER_SRC_DIR "${NNAS_PROJECT_SOURCE_DIR}/compiler")
+
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/loco ${CMAKE_CURRENT_BINARY_DIR}/loco)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/angkor ${CMAKE_CURRENT_BINARY_DIR}/angkor)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/oops ${CMAKE_CURRENT_BINARY_DIR}/oops)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/pepper-str ${CMAKE_CURRENT_BINARY_DIR}/pepper-str)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo ${CMAKE_CURRENT_BINARY_DIR}/logo)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/logo-core ${CMAKE_CURRENT_BINARY_DIR}/logo-core)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/locomotiv ${CMAKE_CURRENT_BINARY_DIR}/locomotiv)
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci/lang ${CMAKE_CURRENT_BINARY_DIR}/luci/lang)
+
+add_subdirectory(${ONE_COMPILER_SRC_DIR}/luci-interpreter ${CMAKE_CURRENT_BINARY_DIR}/luci-interpreter)
diff --git a/compiler/luci-micro/standalone/Toolchain.cmake b/compiler/luci-micro/standalone/Toolchain.cmake

new file mode 100644 (file)

index 0000000..2d23b5d
--- /dev/null
+++ b/compiler/luci-micro/standalone/Toolchain.cmake
@@ -0,0 +1,8 @@
+set(CMAKE_SYSTEM_NAME Generic)
+
+set(CMAKE_SYSTEM_PROCESSOR "${CPU_ARCH}")
+set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+set(CMAKE_C_COMPILER "${C_COMPILER}")
+set(CMAKE_CXX_COMPILER "${CXX_COMPILER}")
+set(CMAKE_ASM_COMPILER "${ASM_COMPILER}")
+set(CMAKE_OBJCOPY "${OBJCOPY}")
diff --git a/compiler/luci-pass-value-test/CMakeLists.txt b/compiler/luci-pass-value-test/CMakeLists.txt

index 2d2befe57f67ed074e90b867b02c341c86c76807..b3141587015a8a3faa5e30aab2c99facb257c9dc 100644 (file)
--- a/compiler/luci-pass-value-test/CMakeLists.txt
+++ b/compiler/luci-pass-value-test/CMakeLists.txt
@@ -38,7 +38,7 @@ add_test(NAME luci_pass_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/eval_driver.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
            "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
            "$<TARGET_FILE:luci_eval_driver>"
            ${LUCI_PASS_VALUE_TESTS}
  )
diff --git a/compiler/luci-value-test/CMakeLists.txt b/compiler/luci-value-test/CMakeLists.txt

index 124f120d49b2fd7e697a126839688c5737b28428..3c7185b80ab2a155032787558300152ca346323a 100644 (file)
--- a/compiler/luci-value-test/CMakeLists.txt
+++ b/compiler/luci-value-test/CMakeLists.txt
@@ -18,7 +18,7 @@ add_test(NAME luci_value_test
    COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/evalverify.sh"
            "${CMAKE_CURRENT_BINARY_DIR}"
            "${ARTIFACTS_BIN_PATH}"
-          "${NNCC_OVERLAY_DIR}/venv_2_3_0"
+          "${NNCC_OVERLAY_DIR}/venv_2_6_0"
            "$<TARGET_FILE:luci_eval_driver>"
            ${LUCI_VALUE_TESTS}
  )
diff --git a/compiler/luci-value-test/README.md b/compiler/luci-value-test/README.md

index 90e92834bada5abedcb669798d394d38f61035d5..6f1d0d54fef6a11494418cdc2811ce6396b4e552 100644 (file)
--- a/compiler/luci-value-test/README.md
+++ b/compiler/luci-value-test/README.md
@@ -5,11 +5,15 @@
  The test proceeds as follows
  
  Step 1: Generate tflite files and circle files from TFLite recipes (listsed in test.lst).
+```
  "TFLite recipe" -> tflchef -> "tflite file" -> tflite2circle -> "circle file"
+```
  
  Step 2: Run TFLite interpreter and luci-interpreter for the generated tflite and circle, respectively.
  (with the same input tensors filled with random values)
+```
  circle file -> luci-interpreter -------> Execution result 1
  tflite file -> TFLite interpreter -----> Execution result 2
+```
  
  Step 3: Compare the execution result 1 and 2. The result must be the same.
diff --git a/compiler/luci-value-test/luci_eval_verifier.py b/compiler/luci-value-test/luci_eval_verifier.py

index f6b0620d8454911335228a753c1aa7c2625c8b54..a76bd14039b23ca6f41fc74e9a9d08c699aa6c03 100755 (executable)
--- a/compiler/luci-value-test/luci_eval_verifier.py
+++ b/compiler/luci-value-test/luci_eval_verifier.py
@@ -64,41 +64,23 @@ for idx in range(len(interpreter.get_output_details())):
      shape_file = open(circle_model + ".output" + str(idx) + ".shape", 'r')
      output_shape = [int(i) for i in shape_file.read().split(',')]
      luci_output_data = np.reshape(output_data, output_shape)
+    intp_output_data = interpreter.get_tensor(output_details["index"])
      try:
          if output_details["dtype"] == np.uint8:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.float32:
              if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=1.e-5,
-                    atol=1.e-5) == False:
+                    luci_output_data, intp_output_data, rtol=1.e-5, atol=1.e-5) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.int64:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          elif output_details["dtype"] == np.int32:
-            if np.allclose(
-                    luci_output_data,
-                    interpreter.get_tensor(
-                        interpreter.get_output_details()[idx]["index"]),
-                    rtol=0,
-                    atol=0) == False:
+            if np.allclose(luci_output_data, intp_output_data, rtol=0, atol=0) == False:
                  raise SystemExit("Execution result of " + tflite_model +
                                   " does not match with " + circle_model)
          else:
diff --git a/compiler/luci/CMakeLists.txt b/compiler/luci/CMakeLists.txt

index 95c349c0d8147bb3c77d97b9218f4e2cfdcf2b41..9dcf1b55d5a7aa03626520d6e6d48e6f692bdbe4 100644 (file)
--- a/compiler/luci/CMakeLists.txt
+++ b/compiler/luci/CMakeLists.txt
@@ -1,3 +1,14 @@
+# Some targets do not support dynamic linking: MCU, TrustZone applications, etc.
+# STATIC_LUCI option allows us to compile luci and luci related components safely
+# and suppress various cmake warnings.
+#
+# Currently this feature is used for luci-interpreter MCU builds.
+if (STATIC_LUCI)
+  set(LIBRARY_TYPE "STATIC")
+else()
+  set(LIBRARY_TYPE "SHARED")
+endif()
+
  add_subdirectory(env)
  add_subdirectory(log)
  add_subdirectory(lang)
@@ -6,6 +17,7 @@ add_subdirectory(testhelper)
  add_subdirectory(service)
  add_subdirectory(pass)
  add_subdirectory(profile)
+add_subdirectory(plan)
  add_subdirectory(partition)
  add_subdirectory(import)
  add_subdirectory(export)
diff --git a/compiler/luci/env/CMakeLists.txt b/compiler/luci/env/CMakeLists.txt

index 4d1a89ad10d3e6dfec6f67674158fa433c076c61..bba51555182eceb5705b9d01660fc3c8a3a30775 100644 (file)
--- a/compiler/luci/env/CMakeLists.txt
+++ b/compiler/luci/env/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_env SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_env ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_env PUBLIC include)
  target_link_libraries(luci_env PRIVATE nncc_common)
  install(TARGETS luci_env DESTINATION lib)
diff --git a/compiler/luci/env/include/luci/UserSettings.h b/compiler/luci/env/include/luci/UserSettings.h

index b56bd65e2fd65a8a1c90e7fa24d0eb3476d95f8d..9fe9592e597ddbfa8d794f89db9a0f5eaed3e641 100644 (file)
--- a/compiler/luci/env/include/luci/UserSettings.h
+++ b/compiler/luci/env/include/luci/UserSettings.h
@@ -33,6 +33,7 @@ struct UserSettings
      MuteWarnings,
      DisableValidation,
      ProfilingDataGen,
+    ExecutionPlanGen,
    };
  
    static UserSettings *settings();
diff --git a/compiler/luci/env/src/UserSettings.cpp b/compiler/luci/env/src/UserSettings.cpp

index b4c661190f4cff1accc1ce44ad9161a51486be98..136fee799e971819e4619a84cd1b8974c772a998 100644 (file)
--- a/compiler/luci/env/src/UserSettings.cpp
+++ b/compiler/luci/env/src/UserSettings.cpp
@@ -31,6 +31,7 @@ private:
    bool _MuteWarnings{false};
    bool _DisableValidation{false};
    bool _ProfilingDataGen{false};
+  bool _ExecutionPlanGen{false};
  };
  
  void UserSettingsImpl::set(const Key key, bool value)
@@ -46,6 +47,9 @@ void UserSettingsImpl::set(const Key key, bool value)
      case Key::ProfilingDataGen:
        _ProfilingDataGen = value;
        break;
+    case Key::ExecutionPlanGen:
+      _ExecutionPlanGen = value;
+      break;
      default:
        throw std::runtime_error("Invalid key in boolean set");
        break;
@@ -62,6 +66,8 @@ bool UserSettingsImpl::get(const Key key) const
        return _DisableValidation;
      case Key::ProfilingDataGen:
        return _ProfilingDataGen;
+    case Key::ExecutionPlanGen:
+      return _ExecutionPlanGen;
      default:
        throw std::runtime_error("Invalid key in boolean get");
        break;
diff --git a/compiler/luci/env/src/UserSettings.test.cpp b/compiler/luci/env/src/UserSettings.test.cpp

index 899c0c2a1d70be3dd508772534e50c3ffd8a6da5..26c606edb99960b29c87d8adc46e9cd5f1b8e8ed 100644 (file)
--- a/compiler/luci/env/src/UserSettings.test.cpp
+++ b/compiler/luci/env/src/UserSettings.test.cpp
@@ -39,6 +39,18 @@ TEST(UserSettings, MuteWarnings)
    ASSERT_TRUE(settings->get(luci::UserSettings::Key::MuteWarnings));
  }
  
+TEST(UserSettings, MuteWarnings_NEG)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::MuteWarnings, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::MuteWarnings));
+
+  settings->set(luci::UserSettings::Key::MuteWarnings, true);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+}
+
  TEST(UserSettings, DisableValidation)
  {
    auto settings = luci::UserSettings::settings();
@@ -51,6 +63,18 @@ TEST(UserSettings, DisableValidation)
    ASSERT_TRUE(settings->get(luci::UserSettings::Key::DisableValidation));
  }
  
+TEST(UserSettings, DisableValidation_NEG)
+{
+  auto settings = luci::UserSettings::settings();
+  ASSERT_NE(nullptr, settings);
+
+  settings->set(luci::UserSettings::Key::DisableValidation, false);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::DisableValidation));
+
+  settings->set(luci::UserSettings::Key::DisableValidation, true);
+  ASSERT_FALSE(settings->get(luci::UserSettings::Key::ProfilingDataGen));
+}
+
  TEST(UserSettings, ProfilingDataGen)
  {
    auto settings = luci::UserSettings::settings();
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt

index 5c0077625c969b7c257425ea2aa82ab67e410395..2b41a6248cb81fe699fee0e8d529443f625f922d 100644 (file)
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -3,7 +3,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  #file(GLOB_RECURSE TESTS "src/*.test.cpp")
  #list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_export SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+    set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_export ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_export PRIVATE src)
  target_include_directories(luci_export PUBLIC include)
  target_link_libraries(luci_export PRIVATE luci_lang)
@@ -14,6 +18,7 @@ target_link_libraries(luci_export PRIVATE luci_env)
  target_link_libraries(luci_export PRIVATE luci_log)
  target_link_libraries(luci_export PRIVATE luci_logex)
  target_link_libraries(luci_export PRIVATE luci_profile)
+target_link_libraries(luci_export PRIVATE luci_plan)
  target_link_libraries(luci_export PRIVATE nncc_common)
  target_link_libraries(luci_export PRIVATE locop)
  target_link_libraries(luci_export PRIVATE oops)
diff --git a/compiler/luci/export/src/CircleExportMetadata.cpp b/compiler/luci/export/src/CircleExportMetadata.cpp

index ef905a882d45b3ed1d9924fd0df1ac39477b450a..017002f5c84cf0855b61e57fa247c46d109ff7d8 100644 (file)
--- a/compiler/luci/export/src/CircleExportMetadata.cpp
+++ b/compiler/luci/export/src/CircleExportMetadata.cpp
@@ -44,6 +44,31 @@ flatbuffers::Offset<circle::Metadata> metadata_offset(flatbuffers::FlatBufferBui
  namespace luci
  {
  
+// 'execution_plan_table' is encoded to binary format.
+const std::vector<uint8_t> CircleExportMetadata::encoded_execution_plan_table()
+{
+  std::vector<uint8_t> data;
+
+  write_u32(data, _execution_plan_table.size());
+
+  for (auto &kv : _execution_plan_table)
+  {
+    const auto id = kv.first;
+    write_u32(data, id);
+
+    const auto plan_vector = kv.second;
+    const auto size = plan_vector.size();
+    write_u32(data, size);
+
+    for (auto elem : plan_vector)
+    {
+      write_u32(data, elem);
+    }
+  }
+
+  return data;
+}
+
  // 'source_table' is encoded to binary format.
  const std::vector<uint8_t> CircleExportMetadata::encoded_source_table(void)
  {
@@ -114,7 +139,11 @@ createCircleMetadataVector(flatbuffers::FlatBufferBuilder &builder, luci::Serial
      metadata_vec.emplace_back(
        metadata_offset(builder, md, md._metadata.encoded_op_table(), "ONE_op_table"));
    }
-
+  if (settings->get(luci::UserSettings::Key::ExecutionPlanGen))
+  {
+    metadata_vec.emplace_back(metadata_offset(
+      builder, md, md._metadata.encoded_execution_plan_table(), "ONE_execution_plan_table"));
+  }
    return metadata_vec;
  }
  
diff --git a/compiler/luci/export/src/CircleOperationExporter.cpp b/compiler/luci/export/src/CircleOperationExporter.cpp

index 014d9bd61639e9d8485eb476e7cec83f86d80099..be64a52d4b5fa6f4ef06d9551075f113479d33dd 100644 (file)
--- a/compiler/luci/export/src/CircleOperationExporter.cpp
+++ b/compiler/luci/export/src/CircleOperationExporter.cpp
@@ -22,6 +22,7 @@
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/CircleNodeVisitor.h>
  #include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
  #include <luci/UserSettings.h>
  #include <luci/Log.h>
  
@@ -1684,7 +1685,7 @@ void OpExporterLet<OE::CIRC>::visit(luci::CircleInstanceNorm *node)
  }
  
  void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, SerializedModelData &md,
-                SerializedGraphData &gd)
+                SerializedGraphData &gd, uint32_t node_position)
  {
    if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
    {
@@ -1702,6 +1703,19 @@ void exportNode(loco::Node *node, flatbuffers::FlatBufferBuilder &builder, Seria
          md._metadata.add_op_table(node_id, source->id());
        }
      }
+    if (has_execution_plan(circle_node))
+    {
+      // Add to node (in node_position) metadata vector with execution_plan information:
+      // order of execution, and offsets output tensors.
+      const auto execution_plan = get_execution_plan(circle_node);
+      std::vector<uint32_t> execution_plan_vector;
+      execution_plan_vector.push_back(execution_plan.order_in_plan());
+      for (auto offset : execution_plan.offsets())
+      {
+        execution_plan_vector.push_back(offset);
+      }
+      md._metadata.add_execution_plan_table(node_position, execution_plan_vector);
+    }
    }
    else
    {
@@ -1717,9 +1731,11 @@ namespace luci
  void exportNodes(loco::Graph *g, FlatBufferBuilder &builder, SerializedModelData &md,
                   SerializedGraphData &gd)
  {
+  uint32_t node_position = 0;
    for (auto node : loco::postorder_traversal(loco::output_nodes(g)))
    {
-    exportNode(node, builder, md, gd);
+    exportNode(node, builder, md, gd, node_position);
+    node_position++;
    }
  }
  
diff --git a/compiler/luci/export/src/SerializedData.h b/compiler/luci/export/src/SerializedData.h

index 95f7b575569d393a397d41ff46c749bc34d6a9bb..a945eecf7726a62819546d8542fbbb502b64c8f4 100644 (file)
--- a/compiler/luci/export/src/SerializedData.h
+++ b/compiler/luci/export/src/SerializedData.h
@@ -20,6 +20,7 @@
  #include <mio/circle/schema_generated.h>
  
  #include <luci/IR/CircleNodes.h>
+#include <luci/IR/ExecutionPlanTable.h>
  
  #include <vector>
  
@@ -63,13 +64,23 @@ public:
      _op_table.at(node_id).emplace(source_id);
    }
  
+  void add_execution_plan_table(uint32_t node_id,
+                                const std::vector<uint32_t> &execution_plan_inform)
+  {
+    _execution_plan_table[node_id] = execution_plan_inform;
+  }
+
  public:
    const std::vector<uint8_t> encoded_source_table(void);
    const std::vector<uint8_t> encoded_op_table(void);
+  const std::vector<uint8_t> encoded_execution_plan_table(void);
  
  private:
    std::map<uint32_t, std::string> _source_table;
    std::map<uint32_t, std::set<uint32_t>> _op_table;
+  // _exec_plan_table stores for node with node_id order of execution, and memory offsets:
+  // first go execution order, then memory offsets for node output tensors.
+  luci::ExecutionPlanTable _execution_plan_table;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt

index 4e200f6aee4edf37715be504b3ee30b5aebfc58c..1df569d1153fa90037d31aef0ea24121df6cc652 100644 (file)
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -2,11 +2,16 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_import SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_import ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_import PRIVATE src)
  target_include_directories(luci_import PUBLIC include)
  target_link_libraries(luci_import PUBLIC luci_lang)
  target_link_libraries(luci_import PUBLIC luci_profile)
+target_link_libraries(luci_import PUBLIC luci_plan)
  target_link_libraries(luci_import PUBLIC mio_circle)
  target_link_libraries(luci_import PRIVATE luci_env)
  target_link_libraries(luci_import PRIVATE luci_log)
diff --git a/compiler/luci/import/src/CircleImportMetadata.cpp b/compiler/luci/import/src/CircleImportMetadata.cpp

index f68f3301ab99d171b6e78f6224042188a9a210cd..42dcebdaa85a79717a033a7d9f33da81f48ee51a 100644 (file)
--- a/compiler/luci/import/src/CircleImportMetadata.cpp
+++ b/compiler/luci/import/src/CircleImportMetadata.cpp
@@ -134,6 +134,55 @@ decoded_op_table(const std::vector<uint8_t> &op_table_data)
    return node_source_ids_map;
  }
  
+// 'execution_plan_table' is decoded to std::map<uint32_t, std::vector<uint32_t>> format.
+const luci::ExecutionPlanTable
+decoded_execution_plan(const std::vector<uint8_t> &execution_plan_data)
+{
+  luci::ExecutionPlanTable execution_plan_table;
+  uint32_t idx = 0;
+
+  if (execution_plan_data.size() < 4)
+    throw std::runtime_error("Op table decode error : invalid entry number");
+
+  uint32_t entry_number = read_u32(execution_plan_data, idx);
+  idx += sizeof(uint32_t);
+
+  while (idx < execution_plan_data.size())
+  {
+    if (idx + 2 * sizeof(uint32_t) > execution_plan_data.size())
+      throw std::runtime_error("Op table decode error : invalid entry item");
+
+    uint32_t id = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    uint32_t size = read_u32(execution_plan_data, idx);
+    idx += sizeof(uint32_t);
+
+    if (idx + sizeof(uint32_t) * size > execution_plan_data.size())
+      throw std::runtime_error("Source table decode error : invalid entry data");
+
+    std::vector<uint32_t> execution_plan_vector;
+    for (uint32_t j = 0; j < size; ++j)
+    {
+      uint32_t execution_plan_inform = read_u32(execution_plan_data, idx);
+      idx += sizeof(uint32_t);
+
+      execution_plan_vector.push_back(execution_plan_inform);
+    }
+
+    if (execution_plan_table.insert({id, execution_plan_vector}).second == false)
+      throw std::runtime_error("Op table decode error : duplicated origin ID");
+  }
+
+  if (idx != execution_plan_data.size())
+    throw std::runtime_error("Op table decode error : data size invalid");
+
+  if (execution_plan_table.size() != entry_number)
+    throw std::runtime_error("Op table decode error : entry number invalid");
+
+  return execution_plan_table;
+}
+
  } // namespace
  
  namespace luci
@@ -153,6 +202,8 @@ CircleImportMetadata::CircleImportMetadata(const luci::CircleReader &reader)
        _op_table = decoded_op_table(buffer);
      else if (meta.name.compare("ONE_source_table") == 0)
        _source_table = decoded_source_table(buffer);
+    else if (meta.name.compare("ONE_execution_plan_table") == 0)
+      _execution_plan_table = decoded_execution_plan(buffer);
    }
  }
  
diff --git a/compiler/luci/import/src/CircleImportMetadata.h b/compiler/luci/import/src/CircleImportMetadata.h

index 007985dcc18ec79540564353478a1cb238f3052a..0e02406789d2f8718961f04767cc2273831ccd1b 100644 (file)
--- a/compiler/luci/import/src/CircleImportMetadata.h
+++ b/compiler/luci/import/src/CircleImportMetadata.h
@@ -20,6 +20,7 @@
  #include "luci/Import/CircleReader.h"
  
  #include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/IR/ExecutionPlanTable.h>
  
  #include <map>
  #include <set>
@@ -47,10 +48,15 @@ public:
  
    const std::map<uint32_t, std::string> &source_table(void) const { return _source_table; }
  
+  const luci::ExecutionPlanTable &execution_plan_table(void) const { return _execution_plan_table; }
+
  private:
    // Decoded metadata is stored
    std::map<uint32_t, std::string> _source_table;
    std::map<uint32_t, std::set<uint32_t>> _op_table;
+  // _execution_plan_table stores for node with node_id order of execution,
+  // and offsets output tensors
+  luci::ExecutionPlanTable _execution_plan_table;
  };
  
  } // namespace luci
diff --git a/compiler/luci/import/src/Importer.cpp b/compiler/luci/import/src/Importer.cpp

index 68baefab076c36c7734a07857de504ac06f0d5df..8eae5fcf4eb1d866c0ebab68e580375781c8f646 100644 (file)
--- a/compiler/luci/import/src/Importer.cpp
+++ b/compiler/luci/import/src/Importer.cpp
@@ -28,6 +28,7 @@
  #include <luci/IR/CircleNodes.h>
  #include <luci/Profile/CircleNodeID.h>
  #include <luci/Profile/CircleNodeOrigin.h>
+#include <luci/Plan/CircleNodeExecutionPlan.h>
  #include <luci/Log.h>
  #include <luci/LogHelper.h>
  
@@ -344,6 +345,25 @@ std::unique_ptr<Module> Importer::importModule(const circle::Model *model) const
      module->source_table(table);
    }
  
+  // Add execution_plan annotations
+  if (circle_metadata->execution_plan_table().size() > 0)
+  {
+    auto execution_plan_table = circle_metadata->execution_plan_table();
+    auto node_position = 0;
+    for (auto node : loco::postorder_traversal(loco::output_nodes(module->graph())))
+    {
+      if (auto circle_node = dynamic_cast<luci::CircleNode *>(node))
+      {
+        auto node_plan = execution_plan_table[node_position];
+        luci::add_execution_plan(
+          circle_node,
+          luci::CircleNodeExecutionPlan(
+            node_plan[0], std::vector<uint32_t>(node_plan.begin() + 1, node_plan.end())));
+      }
+      node_position++;
+    }
+  }
+
    return module;
  }
  
diff --git a/compiler/luci/lang/CMakeLists.txt b/compiler/luci/lang/CMakeLists.txt

index 669a866b1769356e411d1148729ffe88ca1f39cc..433b7cd4e822314d1af7a2dfd068f3a2830b89fb 100644 (file)
--- a/compiler/luci/lang/CMakeLists.txt
+++ b/compiler/luci/lang/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_lang SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_lang ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_lang PRIVATE src)
  target_include_directories(luci_lang PUBLIC include)
  target_link_libraries(luci_lang PUBLIC loco)
diff --git a/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h

new file mode 100644 (file)

index 0000000..5c33c11
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/ExecutionPlanTable.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXECUTION_PLAN_TABLE_H__
+#define __LUCI_EXECUTION_PLAN_TABLE_H__
+
+namespace luci
+{
+
+using ExecutionPlanTable = std::map<uint32_t, std::vector<uint32_t>>;
+
+} // namespace luci
+
+#endif // __LUCI_EXECUTION_PLAN_TABLE_H__
diff --git a/compiler/luci/log/CMakeLists.txt b/compiler/luci/log/CMakeLists.txt

index 23bd0082810062203290f4543bfc08b0bf1f6dfa..b64a0651ef2fb5d3ffe9f5a95e98554aa4051f08 100644 (file)
--- a/compiler/luci/log/CMakeLists.txt
+++ b/compiler/luci/log/CMakeLists.txt
@@ -1,7 +1,11 @@
  # TODO Find how to test logging framework
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
-add_library(luci_log SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+    set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_log ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_log PUBLIC include)
  target_link_libraries(luci_log PUBLIC hermes)
  target_link_libraries(luci_log PRIVATE hermes_std)
diff --git a/compiler/luci/logex/CMakeLists.txt b/compiler/luci/logex/CMakeLists.txt

index cd2571ba18b990c166c1d94feadacf259b722dd2..4d801b046cb7b28618df3c970c14f9adb82342f9 100644 (file)
--- a/compiler/luci/logex/CMakeLists.txt
+++ b/compiler/luci/logex/CMakeLists.txt
@@ -1,7 +1,11 @@
  # TODO Find how to test logging-ex utility
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  
-add_library(luci_logex SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+    set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_logex ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_logex PUBLIC include)
  target_link_libraries(luci_logex PUBLIC loco)
  target_link_libraries(luci_logex PUBLIC locop)
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt

index 236b689c4a541db7a1f7eadbbada773ed9d8dadf..eacbe1ccc7b89d1fa615d7002510a8a33cc7982a 100644 (file)
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_partition SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_partition ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_partition PRIVATE src)
  target_include_directories(luci_partition PUBLIC include)
  target_link_libraries(luci_partition PUBLIC luci_lang)
diff --git a/compiler/luci/pass/CMakeLists.txt b/compiler/luci/pass/CMakeLists.txt

index fd06c6d52ddaf44c4929c54710d4e2e2675ba6b0..2361bb4f5d822ffd1c204433062dd487324fb22e 100644 (file)
--- a/compiler/luci/pass/CMakeLists.txt
+++ b/compiler/luci/pass/CMakeLists.txt
@@ -1,8 +1,18 @@
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "FlatBuffers NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
  file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_pass SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_pass ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_pass PRIVATE src)
  target_include_directories(luci_pass PUBLIC include)
  target_link_libraries(luci_pass PUBLIC loco)
@@ -13,9 +23,11 @@ target_link_libraries(luci_pass PRIVATE luci_log)
  target_link_libraries(luci_pass PRIVATE luci_service)
  target_link_libraries(luci_pass PRIVATE luci_logex)
  target_link_libraries(luci_pass PRIVATE luci_profile)
+target_link_libraries(luci_pass PRIVATE mio_tflite260_inc)
  target_link_libraries(luci_pass PRIVATE nncc_common)
  target_link_libraries(luci_pass PRIVATE pepper_csv2vec)
  target_link_libraries(luci_pass PRIVATE oops)
+target_link_libraries(luci_pass PRIVATE flatbuffers-1.12)
  install(TARGETS luci_pass DESTINATION lib)
  install(DIRECTORY include/ DESTINATION include
          FILES_MATCHING PATTERN "*.h")
@@ -31,4 +43,5 @@ target_include_directories(luci_pass_test PRIVATE src)
  target_link_libraries(luci_pass_test luci_pass)
  target_link_libraries(luci_pass_test luci_lang)
  target_link_libraries(luci_pass_test luci_testhelper)
+target_link_libraries(luci_pass_test flatbuffers-1.12)
  #target_link_libraries(luci_pass_test oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h

index 3bcc7c5bb5b3543aee15c9753e267a0cbad2e8d9..917cacae93abfa556a75e514cf78c25726a203e2 100644 (file)
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -34,6 +34,7 @@ public:
    {
      enum Algorithm
      {
+      FuseAddWithFullyConnected,
        FuseAddWithTConv,
        FuseBatchNormWithConv,
        FuseBatchNormWithDwConv,
@@ -51,8 +52,10 @@ public:
        Requantize,
        FoldAddV2,
        FoldCast,
+      FoldDepthwiseConv2D,
        FoldDequantize,
        FoldSparseToDense,
+      ForceQuantParam,
        ForwardReshapeToUnaryOp,
        SparsifyTensorPass,
        FusePreActivationBatchNorm,
@@ -64,7 +67,9 @@ public:
        ReplaceSubWithAdd,
        SubstitutePackToReshape,
        SubstitutePadV2ToPad,
+      SubstituteSplitVToSplit,
        SubstituteSqueezeToReshape,
+      ExpandBroadcastConst,
        ConvertNCHWToNHWC,
        RemoveUnnecessarySlice,
        RemoveUnnecessaryStridedSlice,
@@ -82,9 +87,12 @@ public:
      enum AlgorithmParameters
      {
        // quantize
-      Quantize_input_dtype,
-      Quantize_output_dtype,
+      Quantize_input_model_dtype,
+      Quantize_output_model_dtype,
        Quantize_granularity, // layer-wise or channel-wise
+      Quantize_tensor_names,
+      Quantize_scales,
+      Quantize_zero_points,
  
        // sparsify
        Sparsify_tensor_name,
@@ -96,6 +104,9 @@ public:
        // convert NCHW to NHWC
        NCHW_to_NHWC_input_shape,
        NCHW_to_NHWC_output_shape,
+
+      Quantize_input_dtype = Quantize_input_model_dtype,   // TODO Remove this
+      Quantize_output_dtype = Quantize_output_model_dtype, // TODO Remove this
      };
  
      virtual ~Options() = default;
@@ -104,6 +115,8 @@ public:
      virtual bool query(Algorithm) = 0;
      virtual void param(AlgorithmParameters, const std::string &) = 0;
      virtual const std::string param(AlgorithmParameters) const = 0;
+    virtual void params(AlgorithmParameters, std::vector<std::string> &) = 0;
+    virtual std::vector<std::string> params(AlgorithmParameters) const = 0;
    };
  
  public:
diff --git a/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h

new file mode 100644 (file)

index 0000000..5ee26b4
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ExpandBroadcastConstPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+#define __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to remove broadcasts of Const nodes.
+ */
+struct ExpandBroadcastConstPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::ExpandBroadcastConstPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_EXPAND_BROADCAST_CONST_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h

new file mode 100644 (file)

index 0000000..58e5b71
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FoldDepthwiseConv2DPass.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+#define __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fold DepthwiseConv2D with constant input and filter into a
+ * constant tensor
+ */
+struct FoldDepthwiseConv2DPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FoldDepthwiseConv2DPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FOLD_DEPTHWISE_CONV_2D_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h

new file mode 100644 (file)

index 0000000..752ce1d
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/ForceQuantParamPass.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FORCE_QUANT_PARAM_PASS_H__
+#define __LUCI_FORCE_QUANT_PARAM_PASS_H__
+
+#include <loco.h>
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief Pass to write quantparam (scale, zerop) to the specified tensors
+ */
+class ForceQuantParamPass : public logo::Pass
+{
+public:
+  using TensorVector = std::vector<std::string>;
+  using ScaleVector = std::vector<float>;
+  using ZPVector = std::vector<int64_t>;
+
+public:
+  ForceQuantParamPass(TensorVector &tensors, ScaleVector &scales, ZPVector &zerops)
+    : _tensors{tensors}, _scales{scales}, _zerops{zerops}
+  {
+    // DO NOTHING
+  }
+  virtual const char *name(void) const { return "luci::ForceQuantParamPass"; }
+
+public:
+  bool run(loco::Graph *graph);
+
+private:
+  TensorVector _tensors;
+  ScaleVector _scales;
+  ZPVector _zerops;
+};
+
+} // namespace luci
+
+#endif //__LUCI_FORCE_QUANT_PARAM_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h

new file mode 100644 (file)

index 0000000..a59b644
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/FuseAddWithFullyConnectedPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+#define __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to fuse Add into FullyConnected
+ */
+struct FuseAddWithFullyConnectedPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::FuseAddWithFullyConnectedPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_FUSE_ADD_WITH_FULLY_CONNECTED_PASS_H__
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h

index 78e7323f9c637674590ada3c9d6e7f60ddbce09f..68765ec5b6d4ff498e5d06c05f53817e909294c5 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeDequantizeWeightsPass.h
@@ -32,9 +32,10 @@ namespace luci
  class QuantizeDequantizeWeightsPass : public logo::Pass
  {
  public:
-  QuantizeDequantizeWeightsPass(loco::DataType input_dtype, loco::DataType output_dtype,
+  QuantizeDequantizeWeightsPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
                                  QuantizationGranularity granularity)
-    : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
+                                                                                        granularity}
    {
      // DO NOTHING
    }
@@ -44,8 +45,8 @@ public:
    bool run(loco::Graph *graph);
  
  private:
-  loco::DataType _input_dtype;
-  loco::DataType _output_dtype;
+  loco::DataType _input_model_dtype;
+  loco::DataType _output_model_dtype;
    QuantizationGranularity _granularity;
  };
  
diff --git a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h

index 9520910d5068b7be44cda1daefc0ff7d87dd57f7..d618a07b6a96c20159d551a03d645035f22c7761 100644 (file)
--- a/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
+++ b/compiler/luci/pass/include/luci/Pass/QuantizeWithMinMaxPass.h
@@ -32,9 +32,10 @@ namespace luci
  class QuantizeWithMinMaxPass : public logo::Pass
  {
  public:
-  QuantizeWithMinMaxPass(loco::DataType input_dtype, loco::DataType output_dtype,
+  QuantizeWithMinMaxPass(loco::DataType input_model_dtype, loco::DataType output_model_dtype,
                           QuantizationGranularity granularity)
-    : _input_dtype{input_dtype}, _output_dtype{output_dtype}, _granularity{granularity}
+    : _input_model_dtype{input_model_dtype}, _output_model_dtype{output_model_dtype}, _granularity{
+                                                                                        granularity}
    {
      // DO NOTHING
    }
@@ -44,8 +45,8 @@ public:
    bool run(loco::Graph *graph);
  
  private:
-  loco::DataType _input_dtype;
-  loco::DataType _output_dtype;
+  loco::DataType _input_model_dtype;
+  loco::DataType _output_model_dtype;
    QuantizationGranularity _granularity;
  };
  
diff --git a/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h

new file mode 100644 (file)

index 0000000..8c89001
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/SubstituteSplitVToSplitPass.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+#define __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to substitute certain SplitV to Split.
+ */
+struct SubstituteSplitVToSplitPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::SubstituteSplitVToSplitPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_SUBSTITUTE_SPLIT_V_TO_SPLIT_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp

index 98c22a07a5cf9cf668b159b6c0ff3ab378496864..5d0c92625525000fc36813f2224fc45e38941c96 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -17,12 +17,16 @@
  #include "luci/CircleOptimizer.h"
  
  #include "luci/Pass/ConvertNCHWToNHWCPass.h"
+#include "luci/Pass/ExpandBroadcastConstPass.h"
  #include "luci/Pass/FoldAddV2Pass.h"
  #include "luci/Pass/FoldCastPass.h"
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
  #include "luci/Pass/FoldDequantizePass.h"
  #include "luci/Pass/FoldSparseToDensePass.h"
  #include "luci/Pass/ForwardReshapeToUnaryOpPass.h"
+#include "luci/Pass/ForceQuantParamPass.h"
  #include "luci/Pass/FuseActivationFunctionPass.h"
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
  #include "luci/Pass/FuseAddWithTConvPass.h"
  #include "luci/Pass/FuseBatchNormWithConvPass.h"
  #include "luci/Pass/FuseBatchNormWithDwConvPass.h"
@@ -55,6 +59,7 @@
  #include "luci/Pass/ShuffleWeightTo16x1Float32Pass.h"
  #include "luci/Pass/SubstitutePackToReshapePass.h"
  #include "luci/Pass/SubstitutePadV2ToPadPass.h"
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
  #include "luci/Pass/SubstituteSqueezeToReshapePass.h"
  #include "luci/Pass/SubstituteStridedSliceToReshapePass.h"
  #include "luci/Pass/SubstituteTransposeToReshapePass.h"
@@ -86,17 +91,37 @@ namespace
  
  using namespace luci;
  
+template <typename T> T lexical_cast(const std::string &str)
+{
+  std::istringstream ss;
+  ss.str(str);
+  T data;
+  ss >> data;
+  return data;
+}
+
+template <typename T> std::vector<T> lexical_cast(std::vector<std::string> &sv)
+{
+  std::vector<T> result;
+  std::transform(sv.begin(), sv.end(), std::back_inserter(result),
+                 [](std::string str) -> T { return lexical_cast<T>(str); });
+  return result;
+}
+
  class OptimizeOptionsImpl final : public luci::CircleOptimizer::Options
  {
  public:
    void enable(Algorithm) final;
    void param(AlgorithmParameters, const std::string &) final;
    const std::string param(AlgorithmParameters) const final;
+  void params(AlgorithmParameters, std::vector<std::string> &) final;
+  std::vector<std::string> params(AlgorithmParameters) const final;
    bool query(Algorithm) final;
  
  private:
    std::vector<Algorithm> _algorithms;
    std::map<AlgorithmParameters, const std::string> _algorithm_params;
+  std::map<AlgorithmParameters, std::vector<std::string>> _multiple_params;
  };
  
  void OptimizeOptionsImpl::enable(Algorithm algo) { _algorithms.push_back(algo); }
@@ -119,6 +144,24 @@ const std::string OptimizeOptionsImpl::param(AlgorithmParameters param) const
    }
  }
  
+void OptimizeOptionsImpl::params(AlgorithmParameters param, std::vector<std::string> &vec)
+{
+  _multiple_params[param] = vec;
+}
+
+std::vector<std::string> OptimizeOptionsImpl::params(AlgorithmParameters param) const
+{
+  auto param_vec = _multiple_params.find(param);
+  if (param_vec != _multiple_params.end())
+  {
+    return param_vec->second;
+  }
+  else
+  {
+    return std::vector<std::string>();
+  }
+}
+
  bool OptimizeOptionsImpl::query(Algorithm algo)
  {
    std::vector<Algorithm>::iterator it = std::find(_algorithms.begin(), _algorithms.end(), algo);
@@ -237,6 +280,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<FuseBatchNormWithTConvPass>());
    }
+  if (_options->query(Options::Algorithm::FuseAddWithFullyConnected))
+  {
+    phase.emplace_back(std::make_unique<FuseAddWithFullyConnectedPass>());
+  }
    if (_options->query(Options::Algorithm::FuseAddWithTConv))
    {
      phase.emplace_back(std::make_unique<FuseAddWithTConvPass>());
@@ -257,6 +304,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::FoldCastPass>());
    }
+  if (_options->query(Options::Algorithm::FoldDepthwiseConv2D))
+  {
+    phase.emplace_back(std::make_unique<luci::FoldDepthwiseConv2DPass>());
+  }
    if (_options->query(Options::Algorithm::FoldDequantize))
    {
      phase.emplace_back(std::make_unique<luci::FoldDequantizePass>());
@@ -281,6 +332,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::ShuffleWeightTo16x1Float32Pass>());
    }
+  if (_options->query(Options::Algorithm::ExpandBroadcastConst))
+  {
+    phase.emplace_back(std::make_unique<luci::ExpandBroadcastConstPass>());
+  }
    if (_options->query(Options::Algorithm::RemoveFakeQuant))
    {
      phase.emplace_back(std::make_unique<luci::RemoveFakeQuantPass>());
@@ -329,6 +384,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
    {
      phase.emplace_back(std::make_unique<luci::SubstitutePadV2ToPadPass>());
    }
+  if (_options->query(Options::Algorithm::SubstituteSplitVToSplit))
+  {
+    phase.emplace_back(std::make_unique<luci::SubstituteSplitVToSplitPass>());
+  }
    if (_options->query(Options::Algorithm::SubstituteSqueezeToReshape))
    {
      phase.emplace_back(std::make_unique<luci::SubstituteSqueezeToReshapePass>());
@@ -363,28 +422,30 @@ void CircleOptimizer::quantize(loco::Graph *g) const
    // Fake quantization of weights
    if (_options->query(Options::Algorithm::QuantizeDequantizeWeights))
    {
-    static const std::vector<std::string> fakeq_supported_input_dtype{"float32"};
-    static const std::vector<std::string> fakeq_supported_output_dtype{"uint8", "int16"};
+    static const std::vector<std::string> fakeq_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> fakeq_supported_output_model_dtype{"uint8", "int16"};
      static const std::vector<std::string> fakeq_supported_granularity{"layer", "channel"};
  
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
      auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
  
-    if (!in_array(to_lower_case(input_dtype), fakeq_supported_input_dtype))
+    if (!in_array(to_lower_case(input_model_dtype), fakeq_supported_input_model_dtype))
        throw std::runtime_error("Unsupported input type. List of supported input type: " +
-                               to_string(fakeq_supported_input_dtype));
+                               to_string(fakeq_supported_input_model_dtype));
  
-    if (!in_array(to_lower_case(output_dtype), fakeq_supported_output_dtype))
+    if (!in_array(to_lower_case(output_model_dtype), fakeq_supported_output_model_dtype))
        throw std::runtime_error("Unsupported output type. List of supported output type: " +
-                               to_string(fakeq_supported_output_dtype));
+                               to_string(fakeq_supported_output_model_dtype));
  
      if (!in_array(to_lower_case(granularity), fakeq_supported_granularity))
        throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                 to_string(fakeq_supported_granularity));
  
      if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
-        str_to_dtype(output_dtype) != loco::DataType::U8)
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
        throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
  
      // Clear existing quantparams before doing fake quantization
@@ -395,39 +456,43 @@ void CircleOptimizer::quantize(loco::Graph *g) const
          circle_node->quantparam(nullptr);
      }
  
-    luci::QuantizeDequantizeWeightsPass fake_quantizer(
-      str_to_dtype(input_dtype), str_to_dtype(output_dtype), str_to_granularity(granularity));
+    luci::QuantizeDequantizeWeightsPass fake_quantizer(str_to_dtype(input_model_dtype),
+                                                       str_to_dtype(output_model_dtype),
+                                                       str_to_granularity(granularity));
      fake_quantizer.run(g);
    }
  
    // Actual quantization of weights, bias, and activation
    if (_options->query(Options::Algorithm::QuantizeWithMinMax))
    {
-    static const std::vector<std::string> qwmm_supported_input_dtype{"float32"};
-    static const std::vector<std::string> qwmm_supported_output_dtype{"uint8", "int16"};
+    static const std::vector<std::string> qwmm_supported_input_model_dtype{"float32"};
+    static const std::vector<std::string> qwmm_supported_output_model_dtype{"uint8", "int16"};
      static const std::vector<std::string> qwmm_supported_granularity{"layer", "channel"};
  
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
      auto granularity = _options->param(Options::AlgorithmParameters::Quantize_granularity);
  
-    if (!in_array(to_lower_case(input_dtype), qwmm_supported_input_dtype))
+    if (!in_array(to_lower_case(input_model_dtype), qwmm_supported_input_model_dtype))
        throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(qwmm_supported_input_dtype));
+                               to_string(qwmm_supported_input_model_dtype));
  
-    if (!in_array(to_lower_case(output_dtype), qwmm_supported_output_dtype))
+    if (!in_array(to_lower_case(output_model_dtype), qwmm_supported_output_model_dtype))
        throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(qwmm_supported_output_dtype));
+                               to_string(qwmm_supported_output_model_dtype));
  
      if (!in_array(to_lower_case(granularity), qwmm_supported_granularity))
        throw std::runtime_error("Unsupported granularity. List of supported granularity: " +
                                 to_string(qwmm_supported_granularity));
  
      if (str_to_granularity(granularity) == QuantizationGranularity::LayerWise &&
-        str_to_dtype(output_dtype) != loco::DataType::U8)
+        str_to_dtype(output_model_dtype) != loco::DataType::U8)
        throw std::runtime_error("Layer-wise quantization only supports uint8 dtype.");
  
-    luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype),
+    luci::QuantizeWithMinMaxPass quantizer(str_to_dtype(input_model_dtype),
+                                           str_to_dtype(output_model_dtype),
                                             str_to_granularity(granularity));
      quantizer.run(g);
  
@@ -446,7 +511,7 @@ void CircleOptimizer::quantize(loco::Graph *g) const
      phase_runner.run(phase);
  
      // Verify the type/granularity of the quantized model
-    luci::QuantizedModelVerifier verifier(str_to_dtype(output_dtype),
+    luci::QuantizedModelVerifier verifier(str_to_dtype(output_model_dtype),
                                            str_to_granularity(granularity));
      verifier.verify(g);
    }
@@ -454,24 +519,44 @@ void CircleOptimizer::quantize(loco::Graph *g) const
    // Requantize
    if (_options->query(Options::Algorithm::Requantize))
    {
-    static const std::vector<std::string> rq_supported_input_dtype{"int8"};
-    static const std::vector<std::string> rq_supported_output_dtype{"uint8"};
+    static const std::vector<std::string> rq_supported_input_model_dtype{"int8"};
+    static const std::vector<std::string> rq_supported_output_model_dtype{"uint8"};
  
-    auto input_dtype = _options->param(Options::AlgorithmParameters::Quantize_input_dtype);
-    auto output_dtype = _options->param(Options::AlgorithmParameters::Quantize_output_dtype);
+    auto input_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_input_model_dtype);
+    auto output_model_dtype =
+      _options->param(Options::AlgorithmParameters::Quantize_output_model_dtype);
  
-    if (!in_array(to_lower_case(input_dtype), rq_supported_input_dtype))
+    if (!in_array(to_lower_case(input_model_dtype), rq_supported_input_model_dtype))
        throw std::runtime_error("Unsupported input type. List of supported input types: " +
-                               to_string(rq_supported_input_dtype));
+                               to_string(rq_supported_input_model_dtype));
  
-    if (!in_array(to_lower_case(output_dtype), rq_supported_output_dtype))
+    if (!in_array(to_lower_case(output_model_dtype), rq_supported_output_model_dtype))
        throw std::runtime_error("Unsupported output type. List of supported output types: " +
-                               to_string(rq_supported_output_dtype));
+                               to_string(rq_supported_output_model_dtype));
  
-    luci::RequantizePass requantizer(str_to_dtype(input_dtype), str_to_dtype(output_dtype));
+    luci::RequantizePass requantizer(str_to_dtype(input_model_dtype),
+                                     str_to_dtype(output_model_dtype));
      requantizer.run(g);
    }
  
+  // Force to write quantparam to specified tensors
+  // NOTE Only per-tensor (not per-channel) qparam can be written
+  if (_options->query(Options::Algorithm::ForceQuantParam))
+  {
+    ForceQuantParamPass::TensorVector tensors =
+      _options->params(Options::AlgorithmParameters::Quantize_tensor_names);
+    auto str_scales = _options->params(Options::AlgorithmParameters::Quantize_scales);
+    auto str_zero_points = _options->params(Options::AlgorithmParameters::Quantize_zero_points);
+
+    // Cast scales/zero_points to proper types
+    ForceQuantParamPass::ScaleVector scales = lexical_cast<float>(str_scales);
+    ForceQuantParamPass::ZPVector zero_points = lexical_cast<int64_t>(str_zero_points);
+
+    ForceQuantParamPass fq(tensors, scales, zero_points);
+    fq.run(g);
+  }
+
    logo::Phase phase;
  
    // Do Shape/Type inference
diff --git a/compiler/luci/pass/src/CircleOptimizer.test.cpp b/compiler/luci/pass/src/CircleOptimizer.test.cpp

index 43d96feafdb6e940003a1c01f695e03e447d6fce..a1b5c7f8045bb294fabe27dec917505f3fc22e51 100644 (file)
--- a/compiler/luci/pass/src/CircleOptimizer.test.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.test.cpp
@@ -33,6 +33,7 @@ TEST(CircleOptimizerTest, optimize_algorithms)
    // TODO add more if needed
    options->enable(Algorithms::FoldAddV2);
    options->enable(Algorithms::FoldCast);
+  options->enable(Algorithms::FoldDepthwiseConv2D);
    options->enable(Algorithms::FoldDequantize);
    options->enable(Algorithms::FoldSparseToDense);
    options->enable(Algorithms::FusePreActivationBatchNorm);
@@ -45,6 +46,7 @@ TEST(CircleOptimizerTest, optimize_algorithms)
    options->enable(Algorithms::SubstituteStridedSliceToReshape);
    options->enable(Algorithms::SubstituteTransposeToReshape);
    options->enable(Algorithms::ConvertNCHWToNHWC);
+  options->enable(Algorithms::ExpandBroadcastConst);
  
    o.optimize(&g);
  
@@ -78,8 +80,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_simple)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    o.quantize(&g);
@@ -95,8 +97,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_input_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -110,8 +112,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_output_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -125,8 +127,8 @@ TEST(CircleOptimizerTest, quantize_quantdequant_gran_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeDequantizeWeights);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "invalid");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -140,8 +142,8 @@ TEST(CircleOptimizerTest, quantize_minmax_simple)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    o.quantize(&g);
@@ -157,8 +159,8 @@ TEST(CircleOptimizerTest, quantize_minmax_input_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -172,8 +174,8 @@ TEST(CircleOptimizerTest, quantize_minmax_output_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
    options->param(AlgorithmParameters::Quantize_granularity, "layer");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -187,8 +189,8 @@ TEST(CircleOptimizerTest, quantize_minmax_gran_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::QuantizeWithMinMax);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "float32");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "float32");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
    options->param(AlgorithmParameters::Quantize_granularity, "invalid");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
@@ -202,8 +204,8 @@ TEST(CircleOptimizerTest, quantize_requant_simple)
    auto options = o.options();
  
    options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
  
    o.quantize(&g);
  
@@ -218,8 +220,8 @@ TEST(CircleOptimizerTest, quantize_requant_input_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "invalid");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "uint8");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "uint8");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
  }
@@ -232,8 +234,8 @@ TEST(CircleOptimizerTest, quantize_requant_output_NEG)
    auto options = o.options();
  
    options->enable(Algorithms::Requantize);
-  options->param(AlgorithmParameters::Quantize_input_dtype, "int8");
-  options->param(AlgorithmParameters::Quantize_output_dtype, "invalid");
+  options->param(AlgorithmParameters::Quantize_input_model_dtype, "int8");
+  options->param(AlgorithmParameters::Quantize_output_model_dtype, "invalid");
  
    EXPECT_THROW(o.quantize(&g), std::runtime_error);
  }
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp

index 95e23e1b8f0eb3bf34a086e405ca679a8ac7ccc3..2707140491dc3c2973161442aea439053a62649c 100644 (file)
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.cpp
@@ -28,6 +28,22 @@
  namespace
  {
  
+bool is_same_shape(const luci::CircleNode *node, const std::vector<loco::Dimension> &shape)
+{
+  if (not node)
+    return false;
+
+  if (shape.size() != node->rank())
+    return false;
+
+  for (uint32_t i = 0; i < shape.size(); i++)
+  {
+    if (not(node->dim(i) == shape[i]))
+      return false;
+  }
+  return true;
+}
+
  enum class DataFormat
  {
    NCHW,
@@ -465,7 +481,7 @@ bool is_NCHW_with_s_const(const T *node, luci::CircleNode *&pred_node,
  //
  // Find MUL with an NCHW pattern described below
  //   - Input (non-constant) shape : [N, C, H, W]
-//   - Input (constant) shape : [1, C, 1, 1] or a scalar (1)
+//   - Input (constant) shape : [1, C, 1, 1], [N, C, H, W] or a scalar (1)
  //   - Output shape : [N, C, H, W]
  bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_node,
                          luci::CircleConst *&multiplier)
@@ -497,26 +513,22 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
    if (const_rank != 4 && const_rank != 0 && const_rank != 1)
      return false;
  
-  if (const_rank == 4)
-  {
-    for (uint32_t i = 0; i < const_rank; i++)
-    {
-      if (i != 1 && multiplier->dim(i).value() != 1)
-        return false;
-    }
-  }
-
    const auto input_cdim = pred_node->dim(1);
    const auto output_cdim = node->dim(1);
  
    if (const_rank == 4)
    {
-    const auto const_cdim = multiplier->dim(1);
-    // Check Input, Output, Const have the same channel size
-    if (const_cdim == input_cdim && input_cdim == output_cdim)
-      return true;
-    else
-      return false;
+    bool supported_shape = false;
+
+    // Check multiplier is (1, C, 1, 1)
+    if (is_same_shape(multiplier, {1, node->dim(1), 1, 1}))
+      supported_shape = true;
+
+    // Check multiplier is (N, C, H, W)
+    if (is_same_shape(multiplier, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+      supported_shape = true;
+
+    return supported_shape;
    }
    if (input_cdim == output_cdim)
      return true;
@@ -527,7 +539,7 @@ bool is_NCHW_with_const(const luci::CircleMul *node, luci::CircleNode *&pred_nod
  // We assume ADD with const input is NCHW if,
  // Input shape: (N, C, H, W)
  // Output shape: (N, C, H, W)
-// 1. Const shape is (1, C, 1, 1) or a scalar (1)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
  // 2. Input, Output, Const have the same C.
  bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_node,
                          luci::CircleConst *&beta)
@@ -559,30 +571,22 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
    if (const_rank != 4 && const_rank != 0 && const_rank != 1)
      return false;
  
-  if (const_rank == 4)
-  {
-    // Check the shape is (1, C, 1, 1)
-    for (uint32_t i = 0; i < const_rank; i++)
-    {
-      if (i == 1)
-        continue;
-
-      if (beta->dim(i).value() != 1)
-        return false;
-    }
-  }
-
    const auto input_cdim = pred_node->dim(1);
    const auto output_cdim = node->dim(1);
  
    if (const_rank == 4)
    {
-    const auto const_cdim = beta->dim(1);
-    // Check Input, Output, Const have the same channel size
-    if (const_cdim == input_cdim && input_cdim == output_cdim)
-      return true;
-    else
-      return false;
+    bool supported_shape = false;
+
+    // Check beta is (1, C, 1, 1)
+    if (is_same_shape(beta, {1, node->dim(1), 1, 1}))
+      supported_shape = true;
+
+    // Check beta is (N, C, H, W)
+    if (is_same_shape(beta, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+      supported_shape = true;
+
+    return supported_shape;
    }
    if (input_cdim == output_cdim)
      return true;
@@ -593,7 +597,7 @@ bool is_NCHW_with_const(const luci::CircleAdd *node, luci::CircleNode *&pred_nod
  // We assume SUB with const input is NCHW if,
  // Input shape: (N, C, H, W)
  // Output shape: (N, C, H, W)
-// 1. Const shape is (1, C, 1, 1) or a scalar (1)
+// 1. Const shape is (1, C, 1, 1), (N, C, H, W) or a scalar (1)
  // 2. Input, Output, Const have the same C.
  bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pred_node,
                          const luci::CircleConst *subtract)
@@ -609,30 +613,22 @@ bool is_NCHW_with_const(const luci::CircleSub *node, const luci::CircleNode *pre
    if (const_rank != 4 && const_rank != 0 && const_rank != 1)
      return false;
  
-  if (const_rank == 4)
-  {
-    // Check the shape is (1, C, 1, 1)
-    for (uint32_t i = 0; i < const_rank; i++)
-    {
-      if (i == 1)
-        continue;
-
-      if (subtract->dim(i).value() != 1)
-        return false;
-    }
-  }
-
    const auto input_cdim = pred_node->dim(1);
    const auto output_cdim = node->dim(1);
  
    if (const_rank == 4)
    {
-    const auto const_cdim = subtract->dim(1);
-    // Check Input, Output, Const have the same channel size
-    if (const_cdim == input_cdim && input_cdim == output_cdim)
-      return true;
-    else
-      return false;
+    bool supported_shape = false;
+
+    // Check subtract is (1, C, 1, 1)
+    if (is_same_shape(subtract, {1, node->dim(1), 1, 1}))
+      supported_shape = true;
+
+    // Check subtract is (N, C, H, W)
+    if (is_same_shape(subtract, {node->dim(0), node->dim(1), node->dim(2), node->dim(3)}))
+      supported_shape = true;
+
+    return supported_shape;
    }
    if (input_cdim == output_cdim)
      return true;
diff --git a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp

index d844246f8de4d3b7b88b4af5b3f9902b02c16d89..c9412fbb1e89c2063957eb0f0f93aae45329dd6a 100644 (file)
--- a/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
+++ b/compiler/luci/pass/src/ConvertNCHWToNHWCPass.test.cpp
@@ -129,6 +129,19 @@ protected:
      return add;
    }
  
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    beta->shape({1, channel_size, 4, 4});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
  public:
    luci::CircleAdd *add = nullptr;
    luci::CircleConst *beta = nullptr;
@@ -420,6 +433,19 @@ protected:
      return mul;
    }
  
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    multiplier->shape({1, channel_size, 4, 4});
+
+    multiplier->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      multiplier->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
  public:
    luci::CircleMul *mul = nullptr;
    luci::CircleConst *multiplier = nullptr;
@@ -695,6 +721,19 @@ protected:
      return sub;
    }
  
+public:
+  void update_const_shape_to_nchw(void)
+  {
+    uint32_t channel_size = 16;
+    beta->shape({1, channel_size, 4, 4});
+
+    beta->size<loco::DataType::FLOAT32>(channel_size * 4 * 4);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::FLOAT32>(i) = i;
+    }
+  }
+
  public:
    luci::CircleSub *sub = nullptr;
    luci::CircleConst *beta = nullptr;
@@ -815,6 +854,30 @@ TEST(ConvertNCHWToNHWC, Add)
    check_pre_trans(g.output->from());
  }
  
+TEST(ConvertNCHWToNHWC, Add_NCHW_const)
+{
+  AddGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.add->x());
+
+  auto add_succs = loco::succs(g.add);
+  EXPECT_EQ(1, add_succs.size());
+  check_post_trans(*add_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.add->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(4, new_beta->dim(1).value());
+  EXPECT_EQ(4, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
  TEST(ConvertNCHWToNHWC, NHWC_Relu)
  {
    // Relu is already NHWC, so it should not be converted
@@ -1123,6 +1186,30 @@ TEST(ConvertNCHWToNHWC, Mul)
    check_pre_trans(g.output->from());
  }
  
+TEST(ConvertNCHWToNHWC, Mul_NCHW_const)
+{
+  MulGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.mul->x());
+
+  auto mul_succs = loco::succs(g.mul);
+  EXPECT_EQ(1, mul_succs.size());
+  check_post_trans(*mul_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_multiplier = dynamic_cast<luci::CircleConst *>(g.mul->y());
+  EXPECT_NE(nullptr, new_multiplier);
+  EXPECT_EQ(4, new_multiplier->rank());
+  EXPECT_EQ(1, new_multiplier->dim(0).value());
+  EXPECT_EQ(4, new_multiplier->dim(1).value());
+  EXPECT_EQ(4, new_multiplier->dim(2).value());
+  EXPECT_EQ(channel_size, new_multiplier->dim(3).value());
+}
+
  TEST(ConvertNCHWToNHWC, MulScalar)
  {
    MulScalarGraph g;
@@ -1432,6 +1519,30 @@ TEST(ConvertNCHWToNHWC, Sub)
    check_pre_trans(g.output->from());
  }
  
+TEST(ConvertNCHWToNHWC, Sub_NCHW_const)
+{
+  SubGraph g;
+  g.init();
+  g.update_const_shape_to_nchw();
+
+  run_phase(&g.g, false, false);
+
+  check_pre_trans(g.sub->x());
+
+  auto sub_succs = loco::succs(g.sub);
+  EXPECT_EQ(1, sub_succs.size());
+  check_post_trans(*sub_succs.begin());
+
+  uint32_t channel_size = 16;
+  auto new_beta = dynamic_cast<luci::CircleConst *>(g.sub->y());
+  EXPECT_NE(nullptr, new_beta);
+  EXPECT_EQ(4, new_beta->rank());
+  EXPECT_EQ(1, new_beta->dim(0).value());
+  EXPECT_EQ(4, new_beta->dim(1).value());
+  EXPECT_EQ(4, new_beta->dim(2).value());
+  EXPECT_EQ(channel_size, new_beta->dim(3).value());
+}
+
  TEST(ConvertNCHWToNHWC, SubScalar)
  {
    SubScalarGraph g;
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp

new file mode 100644 (file)

index 0000000..25fb9f1
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.cpp
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+#include <type_traits>
+
+namespace
+{
+
+luci::CircleConst *create_expanded_constant(luci::CircleConst *node, luci::CircleNode *successor)
+{
+  LOGGER(l);
+
+  if (successor->rank() != node->rank())
+    return nullptr;
+
+  std::vector<uint32_t> broadcast_dims;
+  for (uint32_t dim = 0; dim < node->rank(); ++dim)
+  {
+    if (node->dim(dim) == successor->dim(dim))
+      continue;
+
+    if (node->dim(dim) == 1)
+      broadcast_dims.push_back(dim);
+  }
+
+  if (broadcast_dims.size() != 1 || broadcast_dims.back() != node->rank() - 1)
+  {
+    WARN(l) << "NYI: Only depth broadcast removal is supported";
+    return nullptr;
+  }
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->name(node->name());
+  constant->dtype(node->dtype());
+  constant->rank(node->rank());
+  constant->shape_status(luci::ShapeStatus::VALID);
+
+  uint32_t node_size = node->size<loco::DataType::FLOAT32>();
+  uint32_t constant_size = 1;
+  for (uint32_t i = 0; i < successor->rank(); ++i)
+  {
+    constant->dim(i).set(successor->dim(i).value());
+    constant_size *= constant->dim(i).value();
+  }
+  constant->size<loco::DataType::FLOAT32>(constant_size);
+
+  auto const node_data = &node->at<loco::DataType::FLOAT32>(0);
+  auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+  auto const successor_depth = successor->dim(successor->rank() - 1).value();
+  for (uint32_t d = 0; d < successor_depth; ++d)
+    std::copy(node_data, node_data + node_size, constant_data + d * node_size);
+
+  return constant;
+}
+
+template <typename N> bool expand_node_input(luci::CircleConst *node, luci::CircleNode *successor)
+{
+  static_assert(std::is_base_of<luci::CircleNode, N>::value,
+                "Successor node should have CircleNode base");
+
+  auto const successor_node = loco::must_cast<N *>(successor);
+  auto const successor_x = loco::must_cast<luci::CircleNode *>(successor_node->x());
+  auto const successor_y = loco::must_cast<luci::CircleNode *>(successor_node->y());
+
+  luci::CircleConst *expanded_const;
+
+  if (node == successor_x)
+  {
+    expanded_const = create_expanded_constant(node, successor_y);
+
+    if (expanded_const == nullptr)
+      return false;
+
+    successor_node->x(expanded_const);
+  }
+  else if (node == successor_y)
+  {
+    expanded_const = create_expanded_constant(node, successor_x);
+
+    if (expanded_const == nullptr)
+      return false;
+
+    successor_node->y(expanded_const);
+  }
+
+  return true;
+}
+
+/**
+ * Expand constants following broadcasting rules for binary input nodes (Add, Mul, etc.)
+ *
+ *    BEFORE
+ *
+ *    [CircleInput] [CircleConst (H x W x 1)]
+ *               |     |
+ *             [CircleAdd]
+ *
+ *    AFTER
+ *
+ *    [CircleInput] [CircleConst (H x W x D)]
+ *               |     |
+ *             [CircleAdd]
+ */
+bool expand_broadcast_const(luci::CircleConst *node)
+{
+  if (node->dtype() != loco::DataType::FLOAT32)
+    return false; // Unsupported data type
+
+  bool changed = false;
+
+  for (auto successor : loco::succs(node))
+  {
+    auto const circle_successor = loco::must_cast<luci::CircleNode *>(successor);
+    switch (circle_successor->opcode())
+    {
+      case luci::CircleOpcode::ADD:
+        if (expand_node_input<luci::CircleAdd>(node, circle_successor))
+          changed = true;
+        break;
+      case luci::CircleOpcode::MUL:
+        if (expand_node_input<luci::CircleMul>(node, circle_successor))
+          changed = true;
+        break;
+      case luci::CircleOpcode::DIV:
+        if (expand_node_input<luci::CircleDiv>(node, circle_successor))
+          changed = true;
+        break;
+      default:
+        break; // Unsupported successor node
+    }
+  }
+
+  return changed;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Broadcast expanding for Const nodes
+ **/
+bool ExpandBroadcastConstPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const_node = dynamic_cast<luci::CircleConst *>(node);
+    if (const_node == nullptr)
+      continue;
+
+    if (expand_broadcast_const(const_node))
+      changed = true;
+  }
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp

new file mode 100644 (file)

index 0000000..0734e07
--- /dev/null
+++ b/compiler/luci/pass/src/ExpandBroadcastConstPass.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ExpandBroadcastConstPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+class ExpandBroadcastConstTest : public ::testing::Test
+{
+public:
+  ExpandBroadcastConstTest()
+  {
+    _x = _g.nodes()->create<luci::CircleInput>();
+    _y = _g.nodes()->create<luci::CircleConst>();
+    _add = _g.nodes()->create<luci::CircleAdd>();
+    _output = _g.nodes()->create<luci::CircleOutput>();
+
+    auto graph_input = _g.inputs()->create();
+    graph_input->dtype(loco::DataType::FLOAT32);
+    graph_input->shape({1, H, W, D});
+    _x->index(graph_input->index());
+    _x->dtype(graph_input->dtype());
+    _x->shape({1, H, W, D});
+
+    auto graph_output = _g.outputs()->create();
+    graph_output->dtype(loco::DataType::FLOAT32);
+    graph_output->shape({1, H, W, D});
+    _output->index(graph_output->index());
+    _output->dtype(graph_output->dtype());
+    _output->shape({1, H, W, D});
+
+    _y->dtype(loco::DataType::FLOAT32);
+    _y->shape({1, H, W, 1});
+    _y->size<loco::DataType::FLOAT32>(16);
+
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _add->x(_x);
+    _add->y(_y);
+    _add->shape({1, H, W, D});
+
+    _output->from(_add);
+
+    _x->name("input");
+    _output->name("output");
+  }
+
+protected:
+  uint32_t const H = 4;
+  uint32_t const W = 4;
+  uint32_t const D = 3;
+
+protected:
+  loco::Graph _g;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleInput *_x = nullptr;
+  luci::CircleConst *_y = nullptr;
+  luci::CircleOutput *_output = nullptr;
+};
+
+} // namespace
+
+TEST_F(ExpandBroadcastConstTest, name)
+{
+  luci::ExpandBroadcastConstPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast)
+{
+  for (uint32_t i = 0; i < H * W; ++i)
+    _y->at<loco::DataType::FLOAT32>(i) = static_cast<float>(i);
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+  ASSERT_NE(broadcasted_const, nullptr);
+
+  EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(broadcasted_const->dim(1).value(), H);
+  EXPECT_EQ(broadcasted_const->dim(2).value(), W);
+  EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+  EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+  for (uint32_t i = 0; i < H * W; ++i)
+  {
+    for (uint32_t d = 0; d < D; ++d)
+    {
+      EXPECT_NEAR(broadcasted_const->at<loco::DataType::FLOAT32>(i + H * W * d),
+                  static_cast<float>(i), std::numeric_limits<float>::min());
+    }
+  }
+}
+
+TEST_F(ExpandBroadcastConstTest, remove_broadcast_multiple_successors)
+{
+  auto const circle_sqrt = _g.nodes()->create<luci::CircleSqrt>();
+  circle_sqrt->dtype(loco::DataType::FLOAT32);
+  circle_sqrt->shape({1, H, W, 1});
+  circle_sqrt->x(_y);
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto broadcasted_const = dynamic_cast<luci::CircleConst *>(_add->y());
+  auto original_const = dynamic_cast<luci::CircleConst *>(circle_sqrt->x());
+
+  ASSERT_NE(broadcasted_const, nullptr);
+  EXPECT_EQ(broadcasted_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(broadcasted_const->dim(3).value(), D);
+  EXPECT_EQ(broadcasted_const->size<loco::DataType::FLOAT32>(), H * W * D);
+
+  // Check if another successor's node was left intact
+  ASSERT_NE(original_const, nullptr);
+  EXPECT_EQ(original_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_EQ(original_const->dim(3).value(), 1);
+  EXPECT_EQ(original_const->size<loco::DataType::FLOAT32>(), H * W * 1);
+}
+
+TEST_F(ExpandBroadcastConstTest, broadcast_impossible_NEG)
+{
+  _y->shape({1, H, W, 2});
+  _y->size<loco::DataType::FLOAT32>(H * W * (D - 1));
+
+  luci::ExpandBroadcastConstPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp

new file mode 100644 (file)

index 0000000..6e423e3
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/IR/AttrFusedActFunc.h>
+
+#include <luci/Log.h>
+
+namespace
+{
+
+// TODO Share activation mix/max and compute_input/output code with luci-interpreter
+
+bool compute_output(uint32_t *output_size, luci::Padding padding, int32_t image_size,
+                    int32_t filter_size, int32_t stride, int32_t dilation_rate)
+{
+  auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case luci::Padding::SAME:
+      *output_size = (image_size + stride - 1) / stride;
+      return true;
+
+    case luci::Padding::VALID:
+      *output_size = (image_size + stride - effective_filter_size) / stride;
+      return true;
+
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported padding: " << uint32_t(padding);
+      return false;
+    }
+  }
+}
+
+uint32_t compute_padding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                         int32_t filter_size, int32_t out_size)
+{
+  auto const effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  auto const padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+bool set_kernel_parameters(tflite::DepthwiseParams *params, luci::CircleDepthwiseConv2D *node,
+                           uint32_t padding_height, uint32_t padding_width)
+{
+  switch (node->fusedActivationFunction())
+  {
+    case luci::FusedActFunc::NONE:
+    case luci::FusedActFunc::TANH:
+      params->float_activation_min = std::numeric_limits<float>::lowest();
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU:
+      params->float_activation_min = 0;
+      params->float_activation_max = std::numeric_limits<float>::max();
+      break;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      params->float_activation_min = -1;
+      params->float_activation_max = 1;
+      break;
+    case luci::FusedActFunc::RELU6:
+      params->float_activation_min = 0;
+      params->float_activation_max = 6;
+      break;
+    default:
+    {
+      LOGGER(l);
+      WARN(l) << "Unsupported activation: " << uint32_t(node->fusedActivationFunction());
+      return false;
+    }
+  }
+
+  params->stride_height = node->stride()->h();
+  params->stride_width = node->stride()->w();
+  params->dilation_height_factor = node->dilation()->h();
+  params->dilation_width_factor = node->dilation()->w();
+  params->depth_multiplier = node->depthMultiplier();
+
+  params->padding_values.height = padding_height;
+  params->padding_values.width = padding_width;
+
+  return true;
+}
+
+/**
+ * Fold DepthwiseConv2D with constant input and filter into a constant tensor
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleDepthwiseConv2D]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+bool fold_depthwise_conv_2d(luci::CircleDepthwiseConv2D *node)
+{
+  LOGGER(l);
+
+  auto const input = dynamic_cast<luci::CircleConst *>(node->input());
+
+  if (input == nullptr)
+    return false; // Constant input is required for folding
+
+  auto const filter = dynamic_cast<luci::CircleConst *>(node->filter());
+
+  if (filter == nullptr)
+    return false; // Constant filter is required for folding
+
+  if (filter->dim(0).value() != 1)
+    return false; // Unsupported batch size
+
+  auto const bias = dynamic_cast<luci::CircleConst *>(node->bias());
+
+  if (bias == nullptr)
+    return false; // Constant bias is required for folding
+
+  auto const input_batches = input->dim(0).value();
+  auto const input_height = input->dim(1).value();
+  auto const input_width = input->dim(2).value();
+  auto const input_depth = input->dim(3).value();
+
+  auto const filter_height = filter->dim(1).value();
+  auto const filter_width = filter->dim(2).value();
+  auto const filter_channels_out = filter->dim(3).value();
+
+  if (filter_channels_out % input_depth != 0)
+    return false; // Wrong input/output depth ratio
+
+  if (node->depthMultiplier() != static_cast<int32_t>(filter_channels_out / input_depth))
+    return false; // Wrong depth multiplier value
+
+  if (bias->rank() != 1 || bias->dim(0).value() != filter_channels_out)
+    return false; // Unsupported bias value
+
+  uint32_t output_height = 0;
+  uint32_t output_width = 0;
+
+  if (!compute_output(&output_height, node->padding(), input_height, filter_height,
+                      node->stride()->h(), node->dilation()->h()))
+    return false; // Unsupported output parameters
+
+  if (!compute_output(&output_width, node->padding(), input_width, filter_width,
+                      node->stride()->w(), node->dilation()->w()))
+    return false; // Unsupported output parameters
+
+  auto const padding_height = compute_padding(node->stride()->h(), node->dilation()->h(),
+                                              input_height, filter_height, output_height);
+  auto const padding_width = compute_padding(node->stride()->w(), node->dilation()->w(),
+                                             input_width, filter_width, output_width);
+
+  tflite::DepthwiseParams params{};
+
+  if (!set_kernel_parameters(&params, node, padding_height, padding_width))
+    return false; // Unsupported kernel parameter values
+
+  auto constant = node->graph()->nodes()->create<luci::CircleConst>();
+  constant->name(node->name());
+  constant->dtype(node->dtype());
+  constant->rank(node->rank());
+  constant->shape_status(luci::ShapeStatus::VALID);
+  for (uint32_t i = 0; i < node->rank(); ++i)
+    constant->dim(i).set(node->dim(i).value());
+
+  constant->size<loco::DataType::FLOAT32>(input_batches * output_height * output_width *
+                                          filter_channels_out);
+
+  auto const input_data = &input->at<loco::DataType::FLOAT32>(0);
+  auto const filter_data = &filter->at<loco::DataType::FLOAT32>(0);
+  auto const bias_data = &bias->at<loco::DataType::FLOAT32>(0);
+  auto const constant_data = &constant->at<loco::DataType::FLOAT32>(0);
+
+  auto tensor_shape = [](luci::CircleNode *node) {
+    tflite::RuntimeShape runtime_shape(node->rank());
+    for (uint32_t i = 0; i < node->rank(); ++i)
+      runtime_shape.SetDim(i, node->dim(i).value());
+    return runtime_shape;
+  };
+
+  tflite::reference_ops::DepthwiseConv(params, tensor_shape(input), input_data,
+                                       tensor_shape(filter), filter_data, tensor_shape(bias),
+                                       bias_data, tensor_shape(constant), constant_data);
+
+  loco::replace(node).with(constant);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ * Constant Folding for DepthwiseConv2D Op
+ **/
+bool FoldDepthwiseConv2DPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto depthwise_conv2d = dynamic_cast<CircleDepthwiseConv2D *>(node);
+
+    if (depthwise_conv2d == nullptr)
+      continue;
+
+    switch (depthwise_conv2d->dtype())
+    {
+      case loco::DataType::FLOAT32:
+        changed = fold_depthwise_conv_2d(depthwise_conv2d);
+        break;
+      default:
+        break;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp

new file mode 100644 (file)

index 0000000..b1ef568
--- /dev/null
+++ b/compiler/luci/pass/src/FoldDepthwiseConv2DPass.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FoldDepthwiseConv2DPass.h"
+#include "PassTestGraphs.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+/**
+ *  Graph has an DepthwiseConv2D Op with constant inputs
+ *
+ *    BEFORE
+ *
+ *    [CircleConst] [CircleConst]
+ *               |   |
+ *       [CircleDepthwiseConv2D]
+ *
+ *    AFTER
+ *
+ *           [CircleConst]
+ */
+class FoldDepthwiseConv2DTest : public luci::ConstantFoldingTestGraph, public ::testing::Test
+{
+public:
+  FoldDepthwiseConv2DTest() : luci::ConstantFoldingTestGraph({1, 4, 4, 1}, loco::DataType::FLOAT32)
+  {
+    _dconv = _g.nodes()->create<luci::CircleDepthwiseConv2D>();
+    _dconv_input = _g.nodes()->create<luci::CircleConst>();
+    _dconv_filter = _g.nodes()->create<luci::CircleConst>();
+    _dconv_bias = _g.nodes()->create<luci::CircleConst>();
+
+    _dconv->dtype(loco::DataType::FLOAT32);
+    _dconv->padding(luci::Padding::VALID);
+    _dconv->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _dconv->input(_dconv_input);
+    _dconv->filter(_dconv_filter);
+    _dconv->bias(_dconv_bias);
+    _dconv->shape({1, 4, 4, 1});
+    _dconv->stride()->h(1);
+    _dconv->stride()->w(1);
+    _dconv->depthMultiplier(1);
+
+    _dconv_input->dtype(loco::DataType::FLOAT32);
+    _dconv_input->shape({1, 4, 4, 1});
+    _dconv_input->size<loco::DataType::FLOAT32>(16);
+
+    _dconv_filter->dtype(loco::DataType::FLOAT32);
+    _dconv_filter->shape({1, 1, 1, 1});
+    _dconv_filter->size<loco::DataType::FLOAT32>(1);
+
+    _dconv_bias->dtype(loco::DataType::FLOAT32);
+    _dconv_bias->shape({1});
+    _dconv_bias->size<loco::DataType::FLOAT32>(1);
+
+    _output->from(_dconv);
+  }
+
+protected:
+  void init() final {}
+
+protected:
+  loco::Node *createFoldedPattern() final { return nullptr; }
+
+protected:
+  luci::CircleConst *getFoldedPattern() final
+  {
+    return loco::must_cast<luci::CircleConst *>(_output->from());
+  }
+
+protected:
+  luci::CircleDepthwiseConv2D *_dconv = nullptr;
+  luci::CircleConst *_dconv_input = nullptr;
+  luci::CircleConst *_dconv_filter = nullptr;
+  luci::CircleConst *_dconv_bias = nullptr;
+};
+
+} // namespace
+
+TEST(FoldDepthwiseConv2DPass, name)
+{
+  luci::FoldDepthwiseConv2DPass pass;
+  auto const name = pass.name();
+  ASSERT_NE(nullptr, name);
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_depthwise_conv2d)
+{
+  for (uint32_t i = 0; i < 16; ++i)
+    _dconv_input->at<loco::DataType::FLOAT32>(i) = 0.5;
+  _dconv_filter->at<loco::DataType::FLOAT32>(0) = 0.5;
+
+  luci::FoldDepthwiseConv2DPass pass;
+  ASSERT_TRUE(pass.run(&_g));
+
+  auto folded_const = getFoldedPattern();
+  EXPECT_EQ(folded_const->dtype(), loco::DataType::FLOAT32);
+  EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(0), 0.25,
+              std::numeric_limits<float>::min());
+  EXPECT_NEAR(folded_const->at<loco::DataType::FLOAT32>(15), 0.25,
+              std::numeric_limits<float>::min());
+}
+
+TEST_F(FoldDepthwiseConv2DTest, fold_non_constant_NEG)
+{
+  _dconv->input(_input);
+
+  luci::FoldDepthwiseConv2DPass pass;
+  ASSERT_FALSE(pass.run(&_g));
+}
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.cpp b/compiler/luci/pass/src/ForceQuantParamPass.cpp

new file mode 100644 (file)

index 0000000..32d482f
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+#include "luci/Profile/CircleNodeID.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Log.h>
+
+namespace luci
+{
+
+namespace
+{
+
+void set_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto quantparam = std::make_unique<CircleQuantParam>();
+  quantparam->scale.push_back(scale);
+  quantparam->zerop.push_back(zp);
+
+  node->quantparam(std::move(quantparam));
+}
+
+} // namespace
+
+bool ForceQuantParamPass::run(loco::Graph *g)
+{
+  LOGGER(l);
+  INFO(l) << "ForceQuantParamPass Start" << std::endl;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto const cnode = loco::must_cast<CircleNode *>(node);
+    auto const name = cnode->name();
+    auto target = std::find(_tensors.begin(), _tensors.end(), name);
+    if (target == _tensors.end())
+      continue;
+
+    auto index = target - _tensors.begin();
+    auto scale = _scales[index];
+    auto zp = _zerops[index];
+    set_qparam(cnode, scale, zp);
+
+    _tensors.erase(_tensors.begin() + index);
+    _scales.erase(_scales.begin() + index);
+    _zerops.erase(_zerops.begin() + index);
+  }
+
+  if (_tensors.size() > 0)
+  {
+    std::string msg;
+    for (auto const &t : _tensors)
+      msg += "Tensor does not exist: " + t + ".\n";
+    msg += "Please check tensor name.\n";
+    throw std::runtime_error(msg);
+  }
+
+  INFO(l) << "ForceQuantParamPass End" << std::endl;
+  return false; // one time run
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/ForceQuantParamPass.test.cpp b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp

new file mode 100644 (file)

index 0000000..a9da7c2
--- /dev/null
+++ b/compiler/luci/pass/src/ForceQuantParamPass.test.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/ForceQuantParamPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using TensorVector = luci::ForceQuantParamPass::TensorVector;
+using ScaleVector = luci::ForceQuantParamPass::ScaleVector;
+using ZPVector = luci::ForceQuantParamPass::ZPVector;
+
+std::unique_ptr<luci::CircleQuantParam> make_qparam(float scale, int64_t zp)
+{
+  auto qparam = std::make_unique<luci::CircleQuantParam>();
+  qparam->scale.push_back(scale);
+  qparam->zerop.push_back(zp);
+
+  return std::move(qparam);
+}
+
+bool check_per_tensor_qparam(luci::CircleNode *node, float scale, int64_t zp)
+{
+  assert(node); // FIX_CALLER_UNLESS
+
+  auto qparam = node->quantparam();
+  if (qparam->scale.size() != 1)
+    return false;
+
+  if (qparam->scale[0] != scale)
+    return false;
+
+  if (qparam->zerop.size() != 1)
+    return false;
+
+  if (qparam->zerop[0] != zp)
+    return false;
+
+  return true;
+}
+
+/**
+ *  Graph with a single input and a single output.
+ *
+ *             [Input]
+ *                |
+ *           (graph body) -> implemented by insertGraphBody()
+ *                |
+ *             [Output]
+ *
+ */
+class SISOGraph
+{
+public:
+  SISOGraph() = default;
+
+public:
+  void init()
+  {
+    input = g.nodes()->create<luci::CircleInput>();
+    output = g.nodes()->create<luci::CircleOutput>();
+    input->name("input");
+    output->name("output");
+
+    auto graph_input = g.inputs()->create();
+    input->index(graph_input->index());
+    auto graph_output = g.outputs()->create();
+    output->index(graph_output->index());
+
+    graph_input->dtype(loco::DataType::U8);
+    input->dtype(loco::DataType::U8);
+    output->dtype(loco::DataType::U8);
+    graph_output->dtype(loco::DataType::U8);
+
+    input->quantparam(make_qparam(0.1, 11));
+    output->quantparam(make_qparam(0.2, 12));
+
+    uint32_t channel_size = 16;
+    graph_input->shape({1, channel_size, 4, 4});
+    input->shape({1, channel_size, 4, 4});
+    output->shape({1, channel_size, 4, 4});
+    graph_output->shape({1, channel_size, 4, 4});
+
+    auto graph_body = insertGraphBody(input);
+    output->from(graph_body);
+  }
+
+  virtual ~SISOGraph() = default;
+
+protected:
+  virtual loco::Node *insertGraphBody(loco::Node *input) = 0;
+
+public:
+  loco::Graph g;
+  luci::CircleInput *input = nullptr;
+  luci::CircleOutput *output = nullptr;
+};
+
+class AddGraph final : public SISOGraph
+{
+protected:
+  loco::Node *insertGraphBody(loco::Node *input) override
+  {
+    add = g.nodes()->create<luci::CircleAdd>();
+    beta = g.nodes()->create<luci::CircleConst>();
+
+    add->dtype(loco::DataType::U8);
+    beta->dtype(loco::DataType::U8);
+    add->quantparam(make_qparam(0.1, 11));
+    beta->quantparam(make_qparam(0.2, 12));
+
+    uint32_t channel_size = 16;
+    add->shape({1, 4, 4, channel_size});
+    beta->shape({1, 1, 1, channel_size});
+
+    beta->size<loco::DataType::U8>(channel_size);
+    for (uint32_t i = 0; i < channel_size; i++)
+    {
+      beta->at<loco::DataType::U8>(i) = i;
+    }
+
+    add->x(input);
+    add->y(beta);
+
+    add->name("add");
+    beta->name("beta");
+
+    return add;
+  }
+
+public:
+  luci::CircleAdd *add = nullptr;
+  luci::CircleConst *beta = nullptr;
+};
+
+} // namespace
+
+TEST(ForceQuantParamPassTest, simple)
+{
+  TensorVector tensors{"input", "add"};
+  ScaleVector scales{2.0, 3.0};
+  ZPVector zerops{4, 8};
+
+  luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+  AddGraph g;
+  g.init();
+
+  pass.run(&g.g);
+
+  EXPECT_TRUE(check_per_tensor_qparam(g.input, 2.0, 4));
+  EXPECT_TRUE(check_per_tensor_qparam(g.add, 3.0, 8));
+}
+
+TEST(ForceQuantParamPassTest, name_mismatch_NEG)
+{
+  TensorVector tensors{"no_exist"};
+  ScaleVector scales{2.0};
+  ZPVector zerops{4};
+
+  luci::ForceQuantParamPass pass(tensors, scales, zerops);
+
+  AddGraph g;
+  g.init();
+
+  EXPECT_THROW(pass.run(&g.g), std::runtime_error);
+}
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp

new file mode 100644 (file)

index 0000000..97a962c
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/Nodes/CircleConst.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+/**
+ *  Fuse Add to FullyConnected if the added value is a channel(last dimension)-wise constant
+ *
+ *  BEFORE
+ *                |
+ *      [CircleFullyConnected]
+ *                |
+ *           [CircleAdd]
+ *                |
+ *
+ *  AFTER
+ *                |
+ *       [CircleFullyConnected]   [CircleAdd] (dead)
+ *                |
+ *
+ */
+bool fuse_add_with_fc(luci::CircleFullyConnected *fc)
+{
+  if (not fc)
+    return false;
+
+  if (fc->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  if (fc->fusedActivationFunction() != luci::FusedActFunc::NONE)
+    return false;
+
+  auto weights = dynamic_cast<luci::CircleConst *>(fc->weights());
+  if (not weights)
+    return false;
+
+  // Get add node
+  auto fc_output = loco::succs(fc);
+  if (fc_output.size() != 1)
+    return false;
+
+  auto add = dynamic_cast<luci::CircleAdd *>(*fc_output.begin());
+  if (not add)
+    return false;
+  if (add->dtype() != loco::DataType::FLOAT32)
+    return false;
+
+  // Get addition
+  auto addition = add->x() == fc ? dynamic_cast<luci::CircleConst *>(add->y())
+                                 : dynamic_cast<luci::CircleConst *>(add->x());
+
+  // Non-const addition
+  if (not addition)
+    return false;
+
+  auto rank = addition->rank();
+  // TODO Support scalar addition
+  if (rank == 0)
+    return false;
+
+  for (uint32_t i = 0; i < rank - 1; i++)
+  {
+    if (addition->dim(i).value() != 1)
+      return false;
+  }
+  // Check the last dimesion of addition is the same with the number of neurons of FC
+  if (not(addition->dim(rank - 1) == weights->dim(0)))
+    return false;
+
+  auto fused_bias = luci::clone(addition);
+
+  // Add existing bias values
+  if (auto const_bias = dynamic_cast<luci::CircleConst *>(fc->bias()))
+  {
+    assert(const_bias->dtype() == loco::DataType::FLOAT32);
+
+    auto bias_size = fused_bias->size<loco::DataType::FLOAT32>();
+    assert(bias_size == const_bias->size<loco::DataType::FLOAT32>());
+    for (uint32_t i = 0; i < bias_size; i++)
+      fused_bias->at<loco::DataType::FLOAT32>(i) += const_bias->at<loco::DataType::FLOAT32>(i);
+  }
+
+  fc->bias(fused_bias);
+  fc->fusedActivationFunction(add->fusedActivationFunction());
+
+  // set origin
+  luci::add_origin(fc, luci::get_origin(add));
+
+  replace(add).with(fc);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool FuseAddWithFullyConnectedPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto fc = dynamic_cast<luci::CircleFullyConnected *>(node);
+    if (not fc)
+      continue;
+
+    if (fuse_add_with_fc(fc))
+      changed = true;
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp

new file mode 100644 (file)

index 0000000..4cc2eb5
--- /dev/null
+++ b/compiler/luci/pass/src/FuseAddWithFullyConnectedPass.test.cpp
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/FuseAddWithFullyConnectedPass.h"
+
+#include <luci/IR/CircleNodes.h>
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+// TODO Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+
+/**
+ *  Simple graph for test
+ *
+ *  BEFORE
+ *
+ *         [FC]
+ *           |
+ *     [Add w/ Relu]
+ *
+ *  AFTER
+ *
+ *      [FC w/ Relu] (bias updated)
+ *
+ */
+class FCAddGraphlet
+{
+public:
+  FCAddGraphlet() = default;
+
+  void init(loco::Graph *g)
+  {
+    std::vector<float> weights_val(16 * 4);
+    _fc_f = create_const_node(g, loco::DataType::FLOAT32, {16, 4}, weights_val);
+
+    std::vector<float> bias_val(16);
+    _fc_b = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, bias_val);
+
+    _fc = g->nodes()->create<luci::CircleFullyConnected>();
+    _fc->weights(_fc_f);
+    _fc->bias(_fc_b);
+    _fc->fusedActivationFunction(luci::FusedActFunc::NONE);
+    _fc->dtype(loco::DataType::FLOAT32);
+    _fc->shape({1, 16});
+    _fc->name("fc");
+
+    std::vector<float> addition_val;
+    for (uint32_t i = 0; i < 16; i++)
+      addition_val.push_back(static_cast<float>(i));
+    _add_c = create_const_node(g, loco::DataType::FLOAT32, {1, 16}, addition_val);
+
+    _add = g->nodes()->create<luci::CircleAdd>();
+    _add->x(_fc);
+    _add->y(_add_c);
+    _add->fusedActivationFunction(luci::FusedActFunc::RELU);
+    _add->dtype(loco::DataType::FLOAT32);
+    _add->shape({1, 16});
+    _add->name("add");
+  }
+
+public:
+  luci::CircleFullyConnected *fc() { return _fc; }
+
+protected:
+  luci::CircleFullyConnected *_fc = nullptr;
+  luci::CircleAdd *_add = nullptr;
+  luci::CircleConst *_fc_f = nullptr;
+  luci::CircleConst *_fc_b = nullptr;
+  luci::CircleConst *_add_c = nullptr;
+};
+
+class FuseAddWithFCTestGraph : public TestIOGraph, public FCAddGraphlet
+{
+public:
+  FuseAddWithFCTestGraph() = default;
+
+  void init(void)
+  {
+    TestIOGraph::init({1, 4}, {1, 16});
+    FCAddGraphlet::init(g());
+
+    _fc->input(input());
+
+    output()->from(_add);
+  }
+};
+
+class FuseAddWithFullyConnectedPassTest : public ::testing::Test
+{
+public:
+  FuseAddWithFCTestGraph g;
+  luci::FuseAddWithFullyConnectedPass pass;
+};
+
+} // namespace
+
+TEST_F(FuseAddWithFullyConnectedPassTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto fc = dynamic_cast<luci::CircleFullyConnected *>(g.output()->from());
+  EXPECT_NE(nullptr, fc);
+
+  auto bias = loco::must_cast<luci::CircleConst *>(g.fc()->bias());
+  for (uint32_t i = 0; i < bias->size<loco::DataType::FLOAT32>(); i++)
+  {
+    EXPECT_EQ(i, bias->at<loco::DataType::FLOAT32>(i));
+  }
+}
diff --git a/compiler/luci/pass/src/PropagateQuantParamPass.cpp b/compiler/luci/pass/src/PropagateQuantParamPass.cpp

index 10c11357417bfec26a04885489a34aaa088c1dc3..b1cb7a4187b1d4a4f008ec08f09ffa9be6c7dd99 100644 (file)
--- a/compiler/luci/pass/src/PropagateQuantParamPass.cpp
+++ b/compiler/luci/pass/src/PropagateQuantParamPass.cpp
@@ -73,7 +73,13 @@ struct PropagateQuantParam final : public luci::CircleNodeMutableVisitor<bool>
      return copy_qparam(input_node, node);
    }
  
-  // TODO : Add more Ops (e.g., Transpose)
+  bool visit(luci::CircleTranspose *node)
+  {
+    auto input_node = loco::must_cast<luci::CircleNode *>(node->a());
+    return copy_qparam(input_node, node);
+  }
+
+  // TODO : Add more Ops (e.g., layout-changing Ops)
  };
  
  } // namespace
diff --git a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp

index e99c7b3892d81bf3ed50e505e4ba5d79c81ffab0..c8ad87e3db4783f43de959920486065a63e227f9 100644 (file)
--- a/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
+++ b/compiler/luci/pass/src/QuantizeDequantizeWeightsPass.cpp
@@ -358,7 +358,7 @@ bool QuantizeDequantizeWeightsPass::run(loco::Graph *g)
    // Quantize weights
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeDequantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+    QuantizeDequantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      circle_node->accept(&qw);
    }
diff --git a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp

index 6afc2084ff31f744bac0e5fa2dfd955fed2becb8..be81732f81f5dcb6969b8b198535e7af4cbfec69 100644 (file)
--- a/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
+++ b/compiler/luci/pass/src/QuantizeWithMinMaxPass.cpp
@@ -609,6 +609,20 @@ struct QuantizeSpecialActivation final : public luci::CircleNodeMutableVisitor<v
      set_act_qparam(node, i_scale, i_zp);
    }
  
+  void visit(luci::CircleSplitVOut *node)
+  {
+    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+    auto i_qparam = input->quantparam();
+    assert(i_qparam);
+    assert(i_qparam->scale.size() == 1); // FIX_CALLER_UNLESS
+    assert(i_qparam->zerop.size() == 1); // FIX_CALLER_UNLESS
+    auto i_scale = i_qparam->scale[0];
+    auto i_zp = i_qparam->zerop[0];
+
+    set_act_qparam(node, i_scale, i_zp);
+  }
+
    void visit(luci::CircleUnpackOut *node)
    {
      auto unpack = loco::must_cast<luci::CircleUnpack *>(node->input());
@@ -1157,6 +1171,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
      case luci::CircleOpcode::REVERSE_SEQUENCE:
      case luci::CircleOpcode::SLICE:
      case luci::CircleOpcode::SPACE_TO_BATCH_ND:
+    case luci::CircleOpcode::SPLIT_V:
      case luci::CircleOpcode::STRIDED_SLICE:
      case luci::CircleOpcode::SUM:
      case luci::CircleOpcode::TILE:
@@ -1176,6 +1191,7 @@ void quantize_const_inputs(luci::CircleNode *node, loco::DataType output_type)
      case luci::CircleOpcode::DIV:
      case luci::CircleOpcode::ELU:
      case luci::CircleOpcode::EQUAL:
+    case luci::CircleOpcode::EXP:
      case luci::CircleOpcode::FLOOR:
      case luci::CircleOpcode::FLOOR_DIV:
      case luci::CircleOpcode::GREATER:
@@ -1385,7 +1401,8 @@ void propagate_pad_v2_quantparam(luci::CirclePadV2 *pad_v2, loco::DataType quant
      auto pad_v2_input = loco::must_cast<luci::CircleNode *>(pad_v2->arg(0));
      overwrite_quantparam(pad_v2_input, pad_v2);
  
-    auto const_value_node = dynamic_cast<luci::CircleConst *>(pad_v2->arg(2));
+    auto const_value_node = loco::must_cast<luci::CircleConst *>(
+      pad_v2->arg(2)); // FIX ignore_pad_v2_const_quantization UNLESS
      auto new_const = luci::clone(const_value_node);
  
      const auto pad_v2_input_qparam = pad_v2_input->quantparam();
@@ -1458,7 +1475,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
    // Quantize activation
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeActivation qa(_input_dtype, _output_dtype);
+    QuantizeActivation qa(_input_model_dtype, _output_model_dtype);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      circle_node->accept(&qa);
    }
@@ -1466,7 +1483,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
    // Quantize weights
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeWeights qw(_input_dtype, _output_dtype, _granularity);
+    QuantizeWeights qw(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      circle_node->accept(&qw);
    }
@@ -1474,7 +1491,7 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
    // Quantize bias
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeBias qb(_input_dtype, _output_dtype, _granularity);
+    QuantizeBias qb(_input_model_dtype, _output_model_dtype, _granularity);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      circle_node->accept(&qb);
    }
@@ -1491,20 +1508,20 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
      // (2) concat has no fused activation function
      // (3) the input is not concatenation Op
      // (4) the input is not produced to Ops other than concat
-    propagate_concat_quantparam(concat, _output_dtype);
+    propagate_concat_quantparam(concat, _output_model_dtype);
    }
  
    // Quantize const inputs other than weights and bias
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
-    quantize_const_inputs(circle_node, _output_dtype);
+    quantize_const_inputs(circle_node, _output_model_dtype);
    }
  
    // Update qparam of output of special Ops
    for (auto node : loco::active_nodes(loco::output_nodes(g)))
    {
-    QuantizeSpecialActivation qsa(_input_dtype, _output_dtype);
+    QuantizeSpecialActivation qsa(_input_model_dtype, _output_model_dtype);
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      circle_node->accept(&qsa);
    }
@@ -1514,11 +1531,11 @@ bool QuantizeWithMinMaxPass::run(loco::Graph *g)
    for (auto node : loco::output_nodes(g))
    {
      auto circle_node = loco::must_cast<luci::CircleOutput *>(node);
-    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_dtype)
+    if (static_cast<luci::CircleNode *>(circle_node->from())->dtype() == _output_model_dtype)
      {
-      circle_node->dtype(_output_dtype);
+      circle_node->dtype(_output_model_dtype);
        auto graph_output = graph_outputs->at(circle_node->index());
-      graph_output->dtype(_output_dtype);
+      graph_output->dtype(_output_model_dtype);
      }
    }
  
diff --git a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp

index b8cc09955d8570fbe7662e1173207068379ab14e..3a6d86c331a147c974ad67fac5f72e553eb8f624 100644 (file)
--- a/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
+++ b/compiler/luci/pass/src/QuantizedModelVerifier.test.cpp
@@ -189,6 +189,12 @@ void set_minmax_to_non_const(loco::Graph *g, float min, float max)
      if (split_node != nullptr)
        continue;
  
+    // Min/Max is not recorded for SplitV
+    // See MinMaxObserver.cpp in record_minmax module
+    auto splitv_node = dynamic_cast<luci::CircleSplitV *>(node);
+    if (splitv_node != nullptr)
+      continue;
+
      auto circle_node = loco::must_cast<luci::CircleNode *>(node);
      auto qparam = std::make_unique<luci::CircleQuantParam>();
      {
@@ -410,6 +416,38 @@ private:
    luci::CircleConst *_split_dim = nullptr;
  };
  
+class SplitVTestGraph final : public luci::test::TestIOGraph
+{
+public:
+  void init(void)
+  {
+    TestIOGraph::init({1, 32}, {32});
+    _size_splits = create_dummy_const<Type::S32>(g(), {1});
+    _split_dim = create_dummy_const<Type::S32>(g(), {1});
+    _splitv = g()->nodes()->create<luci::CircleSplitV>();
+    {
+      _splitv->input(input());
+      _splitv->size_splits(_size_splits);
+      _splitv->split_dim(_split_dim);
+    }
+    _splitv_o1 = g()->nodes()->create<luci::CircleSplitVOut>();
+    {
+      _splitv_o1->input(_splitv);
+      _splitv_o1->index(0);
+    }
+
+    output()->from(_splitv_o1);
+
+    set_minmax_to_non_const(g(), -1, 1);
+  }
+
+private:
+  luci::CircleSplitV *_splitv = nullptr;
+  luci::CircleSplitVOut *_splitv_o1 = nullptr;
+  luci::CircleConst *_size_splits = nullptr;
+  luci::CircleConst *_split_dim = nullptr;
+};
+
  class StridedSliceTestGraph final : public SimpleTestGraph
  {
  public:
@@ -1312,6 +1350,30 @@ TEST(QuantizedModelVerifierTest, Split_wrong_granularity_NEG)
    SUCCEED();
  }
  
+TEST(QuantizedModelVerifierTest, SplitV)
+{
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_GRAPH(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_type_NEG)
+{
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::LayerWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::U8, Granularity::ChannelWise, Type::S16);
+  TEST_WITH_WRONG_TYPE(SplitVTestGraph, Type::S16, Granularity::ChannelWise, Type::U8);
+  SUCCEED();
+}
+
+TEST(QuantizedModelVerifierTest, SplitV_wrong_granularity_NEG)
+{
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::LayerWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::U8, Granularity::ChannelWise);
+  TEST_WITH_WRONG_GRANULARITY(SplitVTestGraph, Type::S16, Granularity::ChannelWise);
+  SUCCEED();
+}
+
  TEST(QuantizedModelVerifierTest, StridedSlice)
  {
    TEST_WITH_GRAPH(StridedSliceTestGraph, Type::U8, Granularity::LayerWise);
diff --git a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp

index 1737e5dd6bc523fa92cd70d90f398a89a802df9d..9f7e2f17d3320cd58af2bddced4ecad133ba34ed 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpAddPass.cpp
@@ -16,12 +16,12 @@
  
  #include "luci/Pass/ResolveCustomOpAddPass.h"
  
-#include "flatbuffers/flexbuffers.h"
-
  #include <luci/IR/CircleNodes.h>
  #include <luci/IR/AttrFusedActFunc.h>
  #include <luci/Profile/CircleNodeOrigin.h>
  
+#include <flatbuffers/flexbuffers.h>
+
  namespace
  {
  
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp

index 5e9466a63898576cfafb58eed5ff298a0ae5298f..7ebd7a42911d837e6725cbdd6725b5f749054cb6 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.cpp
@@ -16,11 +16,11 @@
  
  #include "luci/Pass/ResolveCustomOpBatchMatMulPass.h"
  
-#include "flatbuffers/flexbuffers.h"
-
  #include <luci/IR/CircleNodes.h>
  #include <luci/Profile/CircleNodeOrigin.h>
  
+#include <flatbuffers/flexbuffers.h>
+
  namespace
  {
  
diff --git a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp

index 435016f9d0dff7d81c6e76b0b2106d2ab1420f8e..7ef61c2538f9ce6aeed8245ccdf490c1a353cf7f 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpBatchMatMulPass.test.cpp
@@ -18,12 +18,11 @@
  
  #include <luci/IR/CircleNodes.h>
  
-#include "flatbuffers/flatbuffers.h"
-#include "flatbuffers/flexbuffers.h"
-
  #include <luci/test/TestIOGraph.h>
  
  #include <gtest/gtest.h>
+#include <flatbuffers/flatbuffers.h>
+#include <flatbuffers/flexbuffers.h>
  
  namespace
  {
diff --git a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp

index 2167780666a9c4076329468865c890feff52cfd8..1e8f681c8d9f4c438dd76f8c2837956bf0c92a09 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMatMulPass.cpp
@@ -16,7 +16,6 @@
  
  #include "luci/Pass/ResolveCustomOpMatMulPass.h"
  
-#include "flatbuffers/flexbuffers.h"
  #include <loco/IR/DataTypeTraits.h>
  
  #include <luci/IR/CircleNodes.h>
@@ -25,6 +24,8 @@
  #include <loco.h>
  #include <oops/InternalExn.h>
  
+#include <flatbuffers/flexbuffers.h>
+
  namespace
  {
  
diff --git a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp

index d78a587ac9ef009a28b5ab0334b307c3e3849945..f37f2774293ac6050a56476e459ecf2ac2f7f89b 100644 (file)
--- a/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
+++ b/compiler/luci/pass/src/ResolveCustomOpMaxPoolWithArgmaxPass.cpp
@@ -16,7 +16,6 @@
  
  #include "luci/Pass/ResolveCustomOpMaxPoolWithArgmaxPass.h"
  
-#include "flatbuffers/flexbuffers.h"
  #include <loco/IR/DataTypeTraits.h>
  
  #include <luci/IR/CircleNodes.h>
@@ -25,6 +24,8 @@
  #include <loco.h>
  #include <oops/InternalExn.h>
  
+#include <flatbuffers/flexbuffers.h>
+
  namespace
  {
  
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp

new file mode 100644 (file)

index 0000000..9cba9a9
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include <loco.h>
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Profile/CircleNodeOrigin.h>
+
+namespace
+{
+
+void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
+{
+  auto q = src->quantparam();
+  if (q == nullptr)
+    dst->quantparam(nullptr);
+  else
+    dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
+}
+
+// SplitV is substituted to Split if the contents of size_splits are all same
+// For example,
+// size_splits = [32, 32] -> substitute
+// size_splits = [31, 33] -> do not substitute
+bool resolve_splitv(luci::CircleSplitV *sv)
+{
+  auto size_splits = dynamic_cast<luci::CircleConst *>(sv->size_splits());
+  if (not size_splits)
+    return false;
+
+  if (size_splits->dtype() != loco::DataType::S32)
+    return false;
+
+  auto num_split = size_splits->size<loco::DataType::S32>();
+  if (static_cast<int32_t>(num_split) != sv->num_split())
+    return false;
+
+  if (num_split < 1)
+    return false;
+
+  // Check the contents of size_splits are all same
+  auto first_size = size_splits->at<loco::DataType::S32>(0);
+  for (uint32_t i = 1; i < num_split; i++)
+  {
+    if (first_size != size_splits->at<loco::DataType::S32>(i))
+      return false;
+  }
+
+  auto graph = sv->graph();
+  auto split_node = graph->nodes()->create<luci::CircleSplit>();
+  split_node->input(sv->input());
+  split_node->split_dim(sv->split_dim());
+  split_node->num_split(sv->num_split());
+  split_node->name(sv->name());
+  copy_quantparam(split_node, sv);
+  luci::add_origin(split_node, luci::get_origin(sv));
+
+  auto succs = loco::succs(sv);
+  for (auto succ : succs)
+  {
+    auto svo = loco::must_cast<luci::CircleSplitVOut *>(succ);
+    auto so_node = graph->nodes()->create<luci::CircleSplitOut>();
+    so_node->input(split_node);
+    so_node->index(svo->index());
+    so_node->name(svo->name());
+    copy_quantparam(so_node, svo);
+    luci::add_origin(so_node, luci::get_origin(svo));
+
+    replace(svo).with(so_node);
+  }
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+/**
+ *  EXAMPLE (SplitV with num_split = 2)
+ *
+ *  BEFORE
+ *              [CircleNode]
+ *                   |
+ *             [CircleSplitV] (size_splits and split_dim are ignored)
+ *                /      \
+ *   [CircleSplitVOut]  [CircleSplitVOut]
+ *            |                 |
+ *       [CircleNode]     [CircleNode]
+ *
+ *  AFTER
+ *                    [CircleNode]
+ *                     /         \
+ *             [CircleSplit]    [CircleSplitV] (dead)
+ *                /      \               \
+ *   [CircleSplitOut]  [CircleSplitOut]  [CircleSplitVOut] * 2 (dead)
+ *            |                 |
+ *       [CircleNode]     [CircleNode]
+ */
+bool SubstituteSplitVToSplitPass::run(loco::Graph *g)
+{
+  bool changed = false;
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    if (auto sv = dynamic_cast<luci::CircleSplitV *>(node))
+    {
+      if (resolve_splitv(sv))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp

new file mode 100644 (file)

index 0000000..6e30103
--- /dev/null
+++ b/compiler/luci/pass/src/SubstituteSplitVToSplitPass.test.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/SubstituteSplitVToSplitPass.h"
+
+#include <luci/test/TestIOGraph.h>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+
+using namespace luci::test;
+
+const int N = 1;
+const int C = 32;
+const int H = 8;
+const int W = 8;
+
+// Reduce duplicate codes in ResolveCustomOpMatMulPass.cpp
+template <typename T>
+luci::CircleConst *create_const_node(loco::Graph *g, const loco::DataType dtype,
+                                     const std::vector<uint32_t> &shape,
+                                     const std::vector<T> &values)
+{
+  auto node = g->nodes()->create<luci::CircleConst>();
+  node->dtype(dtype);
+  node->rank(shape.size());
+
+  uint32_t size = 1;
+  for (uint32_t i = 0; i < shape.size(); ++i)
+  {
+    node->dim(i) = shape.at(i);
+    size *= shape.at(i);
+  }
+  node->shape_status(luci::ShapeStatus::VALID);
+
+#define INIT_VALUES(DT)                          \
+  {                                              \
+    node->size<DT>(size);                        \
+    for (uint32_t i = 0; i < values.size(); ++i) \
+      node->at<DT>(i) = values[i];               \
+  }
+
+  switch (dtype)
+  {
+    case loco::DataType::U8:
+      INIT_VALUES(loco::DataType::U8);
+      break;
+    case loco::DataType::S16:
+      INIT_VALUES(loco::DataType::S16);
+      break;
+    case loco::DataType::S32:
+      INIT_VALUES(loco::DataType::S32);
+      break;
+    case loco::DataType::FLOAT32:
+      INIT_VALUES(loco::DataType::FLOAT32)
+      break;
+    default:
+      INTERNAL_EXN("create_const_node called with unsupported type");
+      break;
+  }
+  return node;
+}
+/**
+ *  graph having SplitV operator
+ *
+ *                [CircleInput]
+ *                      |
+ *                [CircleSplitV]
+ *                     /  \
+ *      [CircleSplitVOut] [CircleSplitVOut]
+ *             |                   |
+ *       [CircleOutput]     [CircleOutput]
+ */
+class SplitVGraphlet
+{
+public:
+  SplitVGraphlet() = default;
+
+public:
+  void init(loco::Graph *g)
+  {
+    const std::vector<int32_t> splits{16, 16};
+    auto size_splits = create_const_node(g, loco::DataType::S32, {2}, splits);
+
+    const std::vector<int32_t> dim{3};
+    auto split_dim = create_const_node(g, loco::DataType::S32, {1}, dim);
+
+    _sv = g->nodes()->create<luci::CircleSplitV>();
+    _sv->size_splits(size_splits);
+    _sv->split_dim(split_dim);
+    _sv->num_split(2);
+    _sv->name("SplitV");
+
+    _svo1 = g->nodes()->create<luci::CircleSplitVOut>();
+    _svo1->input(_sv);
+    _svo1->index(0);
+    _svo1->name("SplitV0");
+
+    _svo2 = g->nodes()->create<luci::CircleSplitVOut>();
+    _svo2->input(_sv);
+    _svo2->index(1);
+    _svo2->name("SplitV1");
+  }
+
+public:
+  luci::CircleSplitV *split_v() { return _sv; }
+  luci::CircleSplitVOut *split_vo1() { return _svo1; }
+  luci::CircleSplitVOut *split_vo2() { return _svo2; }
+
+protected:
+  luci::CircleSplitV *_sv = nullptr;
+  luci::CircleSplitVOut *_svo1 = nullptr;
+  luci::CircleSplitVOut *_svo2 = nullptr;
+};
+
+class SplitVGraph : public TestIsGraphlet<1>, public TestOsGraphlet<2>, public SplitVGraphlet
+{
+public:
+  SplitVGraph() = default;
+
+  void init(void)
+  {
+    TestIsGraphlet<1>::init(g(), {{N, C, H, W}});
+    TestOsGraphlet<2>::init(g(), {{N, C, H / 2, W / 2}, {N, C, H / 2, W / 2}});
+    SplitVGraphlet::init(g());
+
+    split_v()->input(input(0));
+
+    output(0)->from(split_vo1());
+    output(1)->from(split_vo2());
+  }
+};
+
+class SubstituteSplitVToSplitPassTest : public ::testing::Test
+{
+public:
+  SplitVGraph g;
+  luci::SubstituteSplitVToSplitPass pass;
+};
+
+} // namespace
+
+/**
+ *  Optimized graph looks like below.
+ *
+ *                [CircleInput]
+ *                      |
+ *                [CircleSplit]
+ *                     /  \
+ *      [CircleSplitOut] [CircleSplitOut]
+ *             |                 |
+ *       [CircleOutput]   [CircleOutput]
+ */
+TEST_F(SubstituteSplitVToSplitPassTest, simple_test)
+{
+  g.init();
+
+  auto ret = pass.run(g.g());
+  EXPECT_EQ(true, ret);
+
+  auto so1 = dynamic_cast<luci::CircleSplitOut *>(g.output(0)->from());
+  EXPECT_NE(nullptr, so1);
+
+  auto so2 = dynamic_cast<luci::CircleSplitOut *>(g.output(1)->from());
+  EXPECT_NE(nullptr, so2);
+
+  EXPECT_EQ(so1->input(), so2->input());
+
+  auto s = dynamic_cast<luci::CircleSplit *>(so1->input());
+  EXPECT_NE(nullptr, s);
+
+  auto input = dynamic_cast<luci::CircleInput *>(s->input());
+  EXPECT_NE(nullptr, input);
+}
+
+TEST_F(SubstituteSplitVToSplitPassTest, wrong_condition_NEG)
+{
+  g.init();
+
+  g.split_v()->num_split(3); // Wrong num_split
+  auto ret = pass.run(g.g());
+
+  EXPECT_EQ(false, ret);
+}
diff --git a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp

index 74be86a4cbeabc908bc3773f4833f26d5cd16ea3..f487637824600757d5cb218e861f1f983ca32400 100644 (file)
--- a/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
+++ b/compiler/luci/pass/src/SubstituteSqueezeToReshapePass.cpp
@@ -75,6 +75,18 @@ std::vector<uint32_t> node_shape(const luci::CircleNode *input)
    return shape;
  }
  
+/**
+ * @brief copy quantparam of src to dst
+ */
+void copy_quantparam(luci::CircleNode *dst, const luci::CircleNode *src)
+{
+  auto q = src->quantparam();
+  if (q == nullptr)
+    dst->quantparam(nullptr);
+  else
+    dst->quantparam(std::make_unique<luci::CircleQuantParam>(*q));
+}
+
  /**
   * @brief return CircleConst ptr with values of new_shape
   */
@@ -130,6 +142,7 @@ bool substitute_squeeze_to_reshape(luci::CircleSqueeze *squeeze)
    auto graph = squeeze->graph();
    auto reshape = graph->nodes()->create<luci::CircleReshape>();
    auto shape_const = create_shape_const(graph, reshape_shape);
+  copy_quantparam(reshape, squeeze);
    reshape->name(name + "/Reshape");
    luci::add_origin(reshape, luci::get_origin(squeeze));
    shape_const->name(name + "/Reshape/shape");
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h

index 1706b9e43dec70900840df69410c28b7708f5f3b..bf3ff2e8a4573d25f2c061e5ed2239808ceda503 100644 (file)
--- a/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeChannelWiseGranularity.h
@@ -324,6 +324,19 @@ private:
      return true;
    }
  
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
    bool visit(const luci::CircleStridedSlice *node)
    {
      RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h

index 3954bf216828c972820aed66ea8f0de0375a6d1c..9bc8b31dfd628627d62c3a54b8f03cdab0c92495 100644 (file)
--- a/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeLayerWiseGranularity.h
@@ -310,6 +310,19 @@ private:
      return true;
    }
  
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(is_lwq(node->input()));
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(is_lwq(node));
+    return true;
+  }
+
    bool visit(const luci::CircleStridedSlice *node)
    {
      RETURN_FALSE_UNLESS(is_lwq(node));
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h

index 560abd2ff0d3a5a41a0b2d678528eee47935e175..eeec7b82b7a36bd044435ad393776e27459a3c37 100644 (file)
--- a/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeS16Type.h
@@ -310,6 +310,26 @@ private:
      return true;
    }
  
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::S16))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::S16))
+
+    // SplitVOut has the same qparam with the input of SplitV
+    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+    RETURN_FALSE_UNLESS(node->quantparam());
+    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+    return true;
+  }
+
    bool visit(const luci::CircleStridedSlice *node)
    {
      RETURN_FALSE_UNLESS(has_type(node, Type::S16))
diff --git a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h

index 42cd1ce5518cfe71a789f208c8f84665f4aaa16d..e7dd1b072a341b3b430d9acce32a2e96104fd90d 100644 (file)
--- a/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
+++ b/compiler/luci/pass/src/VerifyQuantizedNodeU8Type.h
@@ -317,6 +317,26 @@ private:
      return true;
    }
  
+  bool visit(const luci::CircleSplitV *node)
+  {
+    // node's output is the input of CircleSplitVOut, thus not quantized
+    RETURN_FALSE_UNLESS(has_type(node->input(), Type::U8))
+    return true;
+  }
+
+  bool visit(const luci::CircleSplitVOut *node)
+  {
+    RETURN_FALSE_UNLESS(has_type(node, Type::U8))
+
+    // SplitVOut has the same qparam with the input of SplitV
+    auto splitv = loco::must_cast<luci::CircleSplitV *>(node->input());
+    auto input = loco::must_cast<luci::CircleNode *>(splitv->input());
+    RETURN_FALSE_UNLESS(node->quantparam());
+    RETURN_FALSE_UNLESS(node->quantparam()->scale[0] == input->quantparam()->scale[0]);
+    RETURN_FALSE_UNLESS(node->quantparam()->zerop[0] == input->quantparam()->zerop[0]);
+    return true;
+  }
+
    bool visit(const luci::CircleStridedSlice *node)
    {
      RETURN_FALSE_UNLESS(has_type(node, Type::U8))
diff --git a/compiler/luci/plan/CMakeLists.txt b/compiler/luci/plan/CMakeLists.txt

new file mode 100644 (file)

index 0000000..9ca6dcb
--- /dev/null
+++ b/compiler/luci/plan/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+
+if (NOT LIBRARY_TYPE)
+    set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_plan ${LIBRARY_TYPE} ${SOURCES})
+target_include_directories(luci_plan PRIVATE src)
+target_include_directories(luci_plan PUBLIC include)
+target_link_libraries(luci_plan PUBLIC loco)
+target_link_libraries(luci_plan PUBLIC luci_lang)
+
+install(TARGETS luci_plan DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
diff --git a/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h

new file mode 100644 (file)

index 0000000..fe966e3
--- /dev/null
+++ b/compiler/luci/plan/include/luci/Plan/CircleNodeExecutionPlan.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+#define __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
+
+#include <luci/IR/CircleNode.h>
+
+#include <utility>
+
+namespace luci
+{
+
+class CircleNodeExecutionPlan
+{
+public:
+  CircleNodeExecutionPlan() = delete;
+
+  CircleNodeExecutionPlan(uint32_t order_in_plan, std::vector<uint32_t> offsets)
+  {
+    _order_in_plan = order_in_plan;
+    _offsets = std::move(offsets);
+  }
+
+  uint32_t order_in_plan(void) const { return _order_in_plan; }
+  void order_in_plan(const uint32_t &order_in_plan) { _order_in_plan = order_in_plan; }
+
+  std::vector<uint32_t> offsets(void) const { return _offsets; }
+  void offsets(const std::vector<uint32_t> &offsets) { _offsets = offsets; }
+
+private:
+  uint32_t _order_in_plan = 0;
+  std::vector<uint32_t> _offsets;
+};
+
+bool has_execution_plan(const luci::CircleNode *circle_node);
+
+void add_execution_plan(luci::CircleNode *circle_node,
+                        const luci::CircleNodeExecutionPlan &execution_plan);
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node);
+
+} // namespace luci
+
+#endif // __LUCI_CIRCLE_NODE_EXECUTION_PLAN_H__
diff --git a/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp

new file mode 100644 (file)

index 0000000..a02ebc4
--- /dev/null
+++ b/compiler/luci/plan/src/CircleNodeExecutionPlan.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Plan/CircleNodeExecutionPlan.h"
+
+#include <loco.h>
+
+#include <stdexcept>
+#include <utility>
+
+namespace
+{
+
+/**
+ * @brief Set annotation for circle node execution plan
+ * @note  Once CircleExecutionPlanAnnotation is annotated, it should not be changed.
+ *        If CircleExecutionPlanAnnotation is needed to be changed, create
+ *        new CircleExecutionPlanAnnotation.
+ */
+class CircleExecutionPlanAnnotation final : public loco::NodeAnnotation
+{
+public:
+  CircleExecutionPlanAnnotation() = delete;
+
+  explicit CircleExecutionPlanAnnotation(luci::CircleNodeExecutionPlan execution_plan)
+    : _execution_plan{std::move(execution_plan)}
+  {
+    // Do nothing
+  }
+
+public:
+  const luci::CircleNodeExecutionPlan &execution_plan(void) const { return _execution_plan; }
+  // No setter
+
+private:
+  luci::CircleNodeExecutionPlan _execution_plan;
+};
+
+} // namespace
+
+namespace luci
+{
+
+bool has_execution_plan(const luci::CircleNode *circle_node)
+{
+  return circle_node->annot<CircleExecutionPlanAnnotation>() != nullptr;
+}
+
+void add_execution_plan(luci::CircleNode *circle_node,
+                        const luci::CircleNodeExecutionPlan &execution_plan)
+{
+  circle_node->annot<CircleExecutionPlanAnnotation>(nullptr);
+  circle_node->annot(std::make_unique<CircleExecutionPlanAnnotation>(execution_plan));
+}
+
+luci::CircleNodeExecutionPlan get_execution_plan(const luci::CircleNode *circle_node)
+{
+  if (!has_execution_plan(circle_node))
+    throw std::runtime_error("Cannot find CircleNodeExecutionPlanAnnotation");
+
+  return circle_node->annot<CircleExecutionPlanAnnotation>()->execution_plan();
+}
+
+} // namespace luci
diff --git a/compiler/luci/profile/CMakeLists.txt b/compiler/luci/profile/CMakeLists.txt

index fdfcaf1de7108ff37c651d990fdbc09d6da96dc8..ae604ab906fb7d98dfcf5e656de41d1bf1498ddd 100644 (file)
--- a/compiler/luci/profile/CMakeLists.txt
+++ b/compiler/luci/profile/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_profile SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_profile ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_profile PRIVATE src)
  target_include_directories(luci_profile PUBLIC include)
  target_link_libraries(luci_profile PUBLIC loco)
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake

index 687bf573aab00f98d19fffaaffce8c47b6c34d2e..3ccc5812834b3dbd793e26055881d6010049c1ec 100644 (file)
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -5,6 +5,7 @@ require("locop")
  require("logo")
  require("logo-core")
  require("mio-circle")
+require("mio-tflite")
  require("oops")
  require("hermes")
  require("hermes-std")
diff --git a/compiler/luci/service/CMakeLists.txt b/compiler/luci/service/CMakeLists.txt

index 781e6d6de2c2bb993ccf0b9a92373941bd0c98cd..f48210b9cab3da64bc38ada2474a7615c7378f01 100644 (file)
--- a/compiler/luci/service/CMakeLists.txt
+++ b/compiler/luci/service/CMakeLists.txt
@@ -2,7 +2,11 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  file(GLOB_RECURSE TESTS "src/*.test.cpp")
  list(REMOVE_ITEM SOURCES ${TESTS})
  
-add_library(luci_service SHARED ${SOURCES})
+if (NOT LIBRARY_TYPE)
+  set(LIBRARY_TYPE "SHARED")
+endif(NOT LIBRARY_TYPE)
+
+add_library(luci_service ${LIBRARY_TYPE} ${SOURCES})
  target_include_directories(luci_service PRIVATE src)
  target_include_directories(luci_service PUBLIC include)
  target_link_libraries(luci_service PUBLIC luci_lang)
diff --git a/compiler/luci/service/src/CircleTypeInferenceRule.cpp b/compiler/luci/service/src/CircleTypeInferenceRule.cpp

index fade2cbd0a63841cd272c9585cadf44af265e189..5f6d46f2b74da99629f88393f6dffaf178d1ecea 100644 (file)
--- a/compiler/luci/service/src/CircleTypeInferenceRule.cpp
+++ b/compiler/luci/service/src/CircleTypeInferenceRule.cpp
@@ -314,8 +314,7 @@ struct TypeInferenceAlgorithm final : public luci::CircleNodeVisitor<loco::DataT
      return input_type;
    }
  
-  // TODO support S16
-  loco::DataType visit(const luci::CircleQuantize *) final { return loco::DataType::U8; }
+  loco::DataType visit(const luci::CircleQuantize *node) final { return luci::dtype_get(node); }
  
    loco::DataType visit(const luci::CircleRange *node) final
    {
diff --git a/compiler/mio-circle/CMakeLists.txt b/compiler/mio-circle/CMakeLists.txt

index 9c1126d6f5fbdbb3351486f1a557d869f25de4af..fa05ef0fa024c52589bba8c53b53c355ed054cf3 100644 (file)
--- a/compiler/mio-circle/CMakeLists.txt
+++ b/compiler/mio-circle/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    return()
diff --git a/compiler/mio-tflite/CMakeLists.txt b/compiler/mio-tflite/CMakeLists.txt

index 9ef2859b972db051f63b88f892b00156c4447fb4..4660e40039db845da605544bf3065beab7a32cf7 100644 (file)
--- a/compiler/mio-tflite/CMakeLists.txt
+++ b/compiler/mio-tflite/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
  
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build mio-tflite: FAILED (missing Flatbuffers)")
@@ -36,3 +36,13 @@ target_link_libraries(mio_tflite_example mio_tflite)
  # TODO provide full tflite validation with runtime/interpreter
  add_executable(mio_tflite_validate example.cpp)
  target_link_libraries(mio_tflite_validate mio_tflite)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.3.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite_inc INTERFACE)
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
diff --git a/compiler/mio-tflite260/CMakeLists.txt b/compiler/mio-tflite260/CMakeLists.txt

new file mode 100644 (file)

index 0000000..39f4d9a
--- /dev/null
+++ b/compiler/mio-tflite260/CMakeLists.txt
@@ -0,0 +1,49 @@
+nnas_find_package(FlatBuffers EXACT 1.12 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "Build mio-tflite260: FAILED (missing Flatbuffers 1.12)")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowSource_FOUND)
+  message(STATUS "Build mio-tflite260: FAILED (missing TensorFlowSource 2.6.0)")
+  return()
+endif(NOT TensorFlowSource_FOUND)
+
+message(STATUS "Build mio-tflite260: TRUE")
+
+set(SCHEMA_FILE "${TensorFlowSource_DIR}/tensorflow/lite/schema/schema.fbs")
+
+# NOTE Use copy of schema.fbs as to provide unified way for circle also
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+  COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+  WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+  DEPENDS "${SCHEMA_FILE}"
+)
+
+FlatBuffers_Target(mio_tflite260
+  OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/tflite"
+  INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+  SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+  SCHEMA_FILES "schema.fbs"
+)
+
+add_executable(mio_tflite260_example example.cpp)
+target_link_libraries(mio_tflite260_example mio_tflite260)
+
+# Temporay tflite validation tool to replace nnkit-tflite
+# TODO provide full tflite validation with runtime/interpreter
+add_executable(mio_tflite260_validate example.cpp)
+target_link_libraries(mio_tflite260_validate mio_tflite260)
+
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+
+if(NOT TensorFlowGEMMLowpSource_FOUND)
+  return()
+endif(NOT TensorFlowGEMMLowpSource_FOUND)
+
+add_library(mio_tflite260_inc INTERFACE)
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowSource_DIR}")
+target_include_directories(mio_tflite260_inc SYSTEM INTERFACE "${TensorFlowGEMMLowpSource_DIR}")
diff --git a/compiler/mio-tflite260/README.md b/compiler/mio-tflite260/README.md

new file mode 100644 (file)

index 0000000..970569b
--- /dev/null
+++ b/compiler/mio-tflite260/README.md
@@ -0,0 +1,3 @@
+# mio-tflite260
+
+_mio-tflite260_ provides a library to access TensorFlow lite model files with V2.6.0.
diff --git a/compiler/mio-tflite260/example.cpp b/compiler/mio-tflite260/example.cpp

new file mode 100644 (file)

index 0000000..2787a3c
--- /dev/null
+++ b/compiler/mio-tflite260/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-tflite260"
+//
+#include <mio/tflite/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!tflite::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt

index 952857c866cc452488780f82ce39de7584f8ec48..42eb4f8a58149f39fd20b8ce58e31e7db76f4f7c 100644 (file)
--- a/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
+++ b/compiler/mir/src/mir_tflite_importer/CMakeLists.txt
@@ -1,4 +1,4 @@
-nnas_find_package(FlatBuffers REQUIRED)
+nnas_find_package(FlatBuffers EXACT 1.10 REQUIRED)
  
  if (NOT FlatBuffers_FOUND)
      return()
diff --git a/compiler/one-cmds/CMakeLists.txt b/compiler/one-cmds/CMakeLists.txt

index fc89f4da5151a2bc696152fd8261f4925567edb7..729bfa80ad0790d673feb0f2088396d0b5447f49 100644 (file)
--- a/compiler/one-cmds/CMakeLists.txt
+++ b/compiler/one-cmds/CMakeLists.txt
@@ -41,7 +41,6 @@ set(ONE_UTILITY_FILES
      one-build.template.cfg
      onecc.template.cfg
      utils.py
-    conv_mixin_1.8.0.patch
  )
  
  foreach(ONE_UTILITY IN ITEMS ${ONE_UTILITY_FILES})
diff --git a/compiler/one-cmds/conv_mixin_1.8.0.patch b/compiler/one-cmds/conv_mixin_1.8.0.patch

deleted file mode 100644 (file)

index 96a0f41..0000000
--- a/compiler/one-cmds/conv_mixin_1.8.0.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- a/onnx_tf/handlers/backend/conv_mixin.py
-+++ b/onnx_tf/handlers/backend/conv_mixin.py
-@@ -98,7 +98,7 @@
-     depthwise = (x_rank == 4 and len(weight_shape) == 4 and group != 1 and
-                  not transpose and not (None in weight_shape))
-     if depthwise and isinstance(x_shape, np.ndarray):
--      depthwise = group == x_shape[1]
-+      depthwise = bool(group == x_shape[1])
- 
-     if depthwise is True:
-       # Depthwise convolution.
diff --git a/compiler/one-cmds/how-to-use-one-commands.txt b/compiler/one-cmds/how-to-use-one-commands.txt

index f867094899b93a06c4fad374affe47e57746ee12..0a0c4b14ce9c0ae3ae50c752f82955ad4848d9f5 100644 (file)
--- a/compiler/one-cmds/how-to-use-one-commands.txt
+++ b/compiler/one-cmds/how-to-use-one-commands.txt
@@ -150,11 +150,14 @@ one-optimize provides network or operator transformation shown below.
  
  Current transformation options are
  - disable_validation : This will turn off operator validations.
+- expand_broadcast_const : This will expand broadcastable constant node inputs
  - fold_add_v2 : This removes AddV2 operation which can be folded
  - fold_cast : This removes Cast operation which can be folded
  - fold_dequantize : This removes Dequantize operation which can be folded
+- fold_dwconv : This folds Depthwise Convolution operation which can be folded
  - fold_sparse_to_dense : This removes SparseToDense operation which can be folded
  - forward_reshape_to_unaryop: This will move Reshape after UnaryOp for centain condition
+- fuse_add_with_fully_connected: This fuses Add operator with the preceding FullyConnected operator if possible
  - fuse_add_with_tconv: This fuses Add operator with the preceding TConv operator if possible
  - fuse_batchnorm_with_conv : This fuses BatchNorm operator to convolution operator
  - fuse_batchnorm_with_dwconv : This fuses BatchNorm operator to depthwise convolution operator
@@ -192,6 +195,8 @@ Current transformation options are
  - shuffle_weight_to_16x1float32 : This will convert weight format of FullyConnected to SHUFFLED16x1FLOAT32.
    Note that it only converts weights whose row is a multiple of 16.
  - substitute_pack_to_reshape : This will convert single input Pack to Reshape.
+- substitute_padv2_to_pad : This will convert certain condition PadV2 to Pad.
+- substitute_splitv_to_split : This will convert certain condition SplitV to Split.
  - substitute_squeeze_to_reshape : This will convert certain condition Squeeze to Reshape.
  - substitute_strided_slice_to_reshape : This will convert certain condition StridedSlice to Reshape.
  - substitute_transpose_to_reshape : This will convert certain condition Transpose to Reshape.
diff --git a/compiler/one-cmds/one-codegen b/compiler/one-cmds/one-codegen

index a496a54ecfe75b98f1644e99f50436de88843952..726538d44365432c2bc75408bc6ecc06bde212f9 100644 (file)
--- a/compiler/one-cmds/one-codegen
+++ b/compiler/one-cmds/one-codegen
@@ -28,6 +28,7 @@ import os
  import subprocess
  import sys
  import tempfile
+import shutil
  
  import utils as _utils
  
@@ -49,6 +50,7 @@ def _get_backends_list():
      The list where `one-codegen` finds its backends
      - `bin` folder where `one-codegen` exists
      - `backends` folder
+    - System path
  
      NOTE If there are backends of the same name in different places,
       the closer to the top in the list, the higher the priority.
@@ -150,6 +152,10 @@ def main():
      for cand in backends_list:
          if ntpath.basename(cand) == backend_base:
              codegen_path = cand
+    if not codegen_path:
+        # Find backend from system path
+        codegen_path = shutil.which(backend_base)
+
      if not codegen_path:
          raise FileNotFoundError(backend_base + ' not found')
      codegen_cmd = [codegen_path] + backend_args + unknown_args
diff --git a/compiler/one-cmds/one-prepare-venv b/compiler/one-cmds/one-prepare-venv

index fbc3a75de82374207184d2f7d432b43130971114..285191761d5b76155dfbded8a5df400c372e05e1 100644 (file)
--- a/compiler/one-cmds/one-prepare-venv
+++ b/compiler/one-cmds/one-prepare-venv
@@ -34,8 +34,8 @@ fi
  # - https://github.com/onnx/onnx-tensorflow/blob/master/Versioning.md
  
  VER_TENSORFLOW=2.3.0
-VER_ONNX=1.8.0
-VER_ONNX_TF=1.8.0
+VER_ONNX=1.10.1
+VER_ONNX_TF=1.9.0
  
  # Install tensorflow
  
@@ -61,7 +61,7 @@ ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install tensorflow-cpu==${VER_TENSORFLOW}
  ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install Pillow==6.2.2
  
  # Install PyTorch and ONNX related
-${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+${VENV_PYTHON} -m pip ${PIP_OPTIONS} install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
  
  # Provide install of custom onnx-tf
  if [ -n "${EXT_ONNX_TF_WHL}" ]; then
@@ -69,23 +69,3 @@ if [ -n "${EXT_ONNX_TF_WHL}" ]; then
  else
    ${VENV_PYTHON} -m pip ${PIP_OPTIONS} install onnx==${VER_ONNX} onnx-tf==${VER_ONNX_TF}
  fi
-
-# TODO remove this patch after onnx-tf next release
-# apply patch for DWConv conversion bug: https://github.com/onnx/onnx-tensorflow/pull/905
-if [[ -z "${EXT_ONNX_TF_WHL}" ]]; then
-  PY_SITE_PACKAGES=$(${VENV_PYTHON} -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
-  if [[ -d ${PY_SITE_PACKAGES} ]]; then
-    pushd ${PY_SITE_PACKAGES} > /dev/null
-    PATCH_TARGET_FILE=onnx_tf/handlers/backend/conv_mixin.py
-    if [[ -f "${PATCH_TARGET_FILE}" ]]; then
-      # if patch is already applied, error code is 1
-      # catch error code and check if this is the case
-      set +e
-      patch -t -N -p1 < ${DRIVER_PATH}/conv_mixin_1.8.0.patch
-      ret_code=$?
-      [[ $ret_code -gt 1 ]] && exit $ret_code
-      set -e
-    fi
-    popd > /dev/null
-  fi
-fi
diff --git a/compiler/one-cmds/one-profile b/compiler/one-cmds/one-profile

index 798cc756c63b71fc5eb082ce9820d08173fa14f4..ed6d8bd7a5ff7f7df582f3ea291d4a1883f2a837 100644 (file)
--- a/compiler/one-cmds/one-profile
+++ b/compiler/one-cmds/one-profile
@@ -157,14 +157,7 @@ def main():
          profile_cmd += getattr(args, 'command').split()
  
      # run backend driver
-    with subprocess.Popen(
-            profile_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-            bufsize=1) as p:
-        for line in p.stdout:
-            sys.stdout.buffer.write(line)
-            sys.stdout.buffer.flush()
-    if p.returncode != 0:
-        sys.exit(p.returncode)
+    _utils._run(profile_cmd, err_prefix=backend_base)
  
  
  if __name__ == '__main__':
diff --git a/compiler/one-cmds/one-quantize b/compiler/one-cmds/one-quantize

index 25ef17ab14803c786be59d7baf19989b64a985ea..cd623a6f83a61776cf5c2b00af9181f6f1932a26 100644 (file)
--- a/compiler/one-cmds/one-quantize
+++ b/compiler/one-cmds/one-quantize
@@ -88,6 +88,17 @@ def _get_parser():
          type=str,
          help='record mode (supported: percentile/moving_average, default=percentile)')
  
+    # arguments for force_quantparam
+    parser.add_argument(
+        '--force_quantparam',
+        action='store_true',
+        help='write quantparam to the specified tensor')
+    parser.add_argument(
+        '--tensor_name', type=str, action='append', help='tensor name (string)')
+    parser.add_argument('--scale', type=float, action='append', help='scale (float)')
+    parser.add_argument(
+        '--zero_point', type=int, action='append', help='zero point (int)')
+
      return parser
  
  
@@ -114,8 +125,22 @@ def _verify_arg(parser, args):
          missing.append('-i/--input_path')
      if not _utils._is_valid_attr(args, 'output_path'):
          missing.append('-o/--output_path')
+    if _utils._is_valid_attr(args, 'force_quantparam'):
+        if not _utils._is_valid_attr(args, 'tensor_name'):
+            missing.append('--tensor_name')
+        if not _utils._is_valid_attr(args, 'scale'):
+            missing.append('--scale')
+        if not _utils._is_valid_attr(args, 'zero_point'):
+            missing.append('--zero_point')
      if len(missing):
          parser.error('the following arguments are required: ' + ' '.join(missing))
+    if _utils._is_valid_attr(args, 'force_quantparam'):
+        tensors = getattr(args, 'tensor_name')
+        scales = getattr(args, 'scale')
+        zerops = getattr(args, 'zero_point')
+        if len(tensors) != len(scales) or len(tensors) != len(zerops):
+            parser.error(
+                'The same number of tensor_name, scale, and zero_point should be given.')
  
  
  def _parse_arg(parser):
@@ -128,6 +153,11 @@ def _parse_arg(parser):
  
  
  def _quantize(args):
+    if _utils._is_valid_attr(args, 'force_quantparam'):
+        # write quantization parameters
+        _write_qparam(args)
+        return
+
      # get file path to log
      dir_path = os.path.dirname(os.path.realpath(__file__))
      logfile_path = os.path.realpath(args.output_path) + '.log'
@@ -233,6 +263,43 @@ def _quantize(args):
          _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
  
  
+def _write_qparam(args):
+    # get file path to log
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    logfile_path = os.path.realpath(args.output_path) + '.log'
+
+    with open(logfile_path, 'wb') as f:
+        # get driver path
+        circle_quantizer_path = os.path.join(dir_path, 'circle-quantizer')
+
+        # make a command to write qparams to the tensors
+        circle_quantizer_cmd = [circle_quantizer_path]
+        # verbose
+        if _utils._is_valid_attr(args, 'verbose'):
+            circle_quantizer_cmd.append('--verbose')
+        if _utils._is_valid_attr(args, 'tensor_name'):
+            tensor_name = getattr(args, 'tensor_name')
+        if _utils._is_valid_attr(args, 'scale'):
+            scale = getattr(args, 'scale')
+        if _utils._is_valid_attr(args, 'zero_point'):
+            zero_point = getattr(args, 'zero_point')
+        for (t, s, zp) in zip(tensor_name, scale, zero_point):
+            circle_quantizer_cmd.append('--force_quantparam')
+            circle_quantizer_cmd.append(t)
+            circle_quantizer_cmd.append(str(s))
+            circle_quantizer_cmd.append(str(zp))
+        # input and output path
+        if _utils._is_valid_attr(args, 'input_path'):
+            circle_quantizer_cmd.append(getattr(args, 'input_path'))
+        if _utils._is_valid_attr(args, 'output_path'):
+            circle_quantizer_cmd.append(getattr(args, 'output_path'))
+
+        f.write((' '.join(circle_quantizer_cmd) + '\n').encode())
+
+        # run circle-quantizer
+        _utils._run(circle_quantizer_cmd, err_prefix="circle_quantizer", logfile=f)
+
+
  def main():
      # parse arguments
      parser = _get_parser()
diff --git a/compiler/one-cmds/tests/one-import_neg_002.test b/compiler/one-cmds/tests/one-import_neg_002.test

index 738c2cba9c080338689adb02958752aef37972d0..9cf0b1401b8810faa13a6f7e91706f7f3a0bbbff 100644 (file)
--- a/compiler/one-cmds/tests/one-import_neg_002.test
+++ b/compiler/one-cmds/tests/one-import_neg_002.test
@@ -21,10 +21,16 @@ filename="${filename_ext%.*}"
  
  trap_err_onexit()
  {
+  # TF2.3.0
    if grep -q "is incompatible with result type" "${filename}.log"; then
      echo "${filename_ext} SUCCESS"
      exit 0
    fi
+  # TF2.6.0
+  if grep -q "is incompatible with body result type" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
  
    echo "${filename_ext} FAILED"
    exit 255
diff --git a/compiler/one-cmds/tests/one-import_neg_006.test b/compiler/one-cmds/tests/one-import_neg_006.test

index 7c63ee3e4dc38280ee7d105f1702e8de42cbd713..3fb5c7df1ef8cf4f9210543d3f215c7336d7a595 100644 (file)
--- a/compiler/one-cmds/tests/one-import_neg_006.test
+++ b/compiler/one-cmds/tests/one-import_neg_006.test
@@ -45,5 +45,8 @@ one-import tf \
  --input_arrays input --input_shapes "0,299,299,3" \
  --output_arrays InceptionV3/Predictions/Reshape_1 > ${filename}.log 2>&1
  
-echo "${filename_ext} FAILED"
-exit 255
+# NOTE TF2.3.0 fails(which is expected) but doesn't for TF2.5(4?) and above
+# https://github.com/tensorflow/tensorflow/issues/51756 for details
+# TODO exit 255
+echo "${filename_ext} SKIPPED"
+exit 0
diff --git a/compiler/one-cmds/tests/one-quantize_005.test b/compiler/one-cmds/tests/one-quantize_005.test

new file mode 100644 (file)

index 0000000..8449df6
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_005.test
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_005.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test with force_quantparam option
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_006.test b/compiler/one-cmds/tests/one-quantize_006.test

new file mode 100644 (file)

index 0000000..92b9ebe
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_006.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.one-quantize_006.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test with force_quantparam option (multi tensors)
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--zero_point 33 \
+--tensor_name InceptionV3/Predictions/Reshape_1 \
+--scale 2.3 \
+--zero_point 33 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/one-quantize_neg_018.test b/compiler/one-cmds/tests/one-quantize_neg_018.test

new file mode 100644 (file)

index 0000000..6937caf
--- /dev/null
+++ b/compiler/one-cmds/tests/one-quantize_neg_018.test
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# negative usage with invalid min_percentile
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  if grep -q "following arguments are required: --zero_point" "${filename}.log"; then
+    echo "${filename_ext} SUCCESS"
+    exit 0
+  fi
+
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+inputfile="./inception_v3.mat.q8.circle"
+outputfile="./inception_v3.neg_018.q8.circle"
+
+rm -rf ${outputfile}.log
+
+# run test
+one-quantize \
+--force_quantparam \
+--tensor_name input \
+--scale 2.3 \
+--input_path ${inputfile} \
+--output_path ${outputfile} > ${filename}.log 2>&1
+
+echo "${filename_ext} FAILED"
+exit 255
diff --git a/compiler/one-cmds/tests/onecc_022.cfg b/compiler/one-cmds/tests/onecc_022.cfg

new file mode 100644 (file)

index 0000000..9741d51
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.cfg
@@ -0,0 +1,18 @@
+[onecc]
+one-import-tf=False
+one-import-tflite=False
+one-import-bcq=False
+one-import-onnx=False
+one-optimize=False
+one-quantize=True
+one-pack=False
+one-codegen=False
+one-profile=False
+
+[one-quantize]
+input_path=inception_v3.mat.q8.circle
+output_path=inception_v3.onecc_022.q8.circle
+force_quantparam=True
+tensor_name=input
+scale=2.1
+zero_point=45
diff --git a/compiler/one-cmds/tests/onecc_022.test b/compiler/one-cmds/tests/onecc_022.test

new file mode 100644 (file)

index 0000000..3aaa26f
--- /dev/null
+++ b/compiler/one-cmds/tests/onecc_022.test
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# one-quantize
+
+filename_ext="$(basename -- $0)"
+filename="${filename_ext%.*}"
+
+trap_err_onexit()
+{
+  echo "${filename_ext} FAILED"
+  exit 255
+}
+
+trap trap_err_onexit ERR
+
+configfile="onecc_022.cfg"
+outputfile="inception_v3.onecc_022.q8.circle"
+
+rm -rf ${outputfile}
+
+# run test
+onecc -C ${configfile} > /dev/null 2>&1
+
+if [[ ! -s "${outputfile}" ]]; then
+  trap_err_onexit
+fi
+
+echo "${filename_ext} SUCCESS"
diff --git a/compiler/one-cmds/tests/prepare_test_materials.sh b/compiler/one-cmds/tests/prepare_test_materials.sh

index 694651d746b23b6722b87aeadf0ec93f2e54eebf..7f269530cfd25d4e1ccf671df46c2469a7a6fb81 100644 (file)
--- a/compiler/one-cmds/tests/prepare_test_materials.sh
+++ b/compiler/one-cmds/tests/prepare_test_materials.sh
@@ -103,4 +103,14 @@ if [[ ! -s ${outputfile} ]]; then
    --output_arrays InceptionV3/Predictions/Reshape_1
  fi
  
+# prepare 'inception_v3.mat.q8.circle' file used for quantization test
+inputfile="./inception_v3.circle"
+outputfile="./inception_v3.mat.q8.circle"
+
+if [[ ! -s ${outputfile} ]]; then
+  ../bin/one-quantize \
+  --input_path ${inputfile} \
+  --output_path ${outputfile}
+fi
+
  popd > /dev/null
diff --git a/compiler/one-cmds/utils.py b/compiler/one-cmds/utils.py

index f18dc6f56d620d02496e5b1543d84b109a3cf7a5..efb01a21099720995f2c74ce62f39d53073613bd 100644 (file)
--- a/compiler/one-cmds/utils.py
+++ b/compiler/one-cmds/utils.py
@@ -29,6 +29,7 @@ class _CONSTANT:
          ('convert_nchw_to_nhwc',
           'Experimental: This will convert NCHW operators to NHWC under the assumption that input model is NCHW.'
           ),
+        ('expand_broadcast_const', 'expand broadcastable constant node inputs'),
          ('nchw_to_nhwc_input_shape',
           'convert the input shape of the model (argument for convert_nchw_to_nhwc)'),
          ('nchw_to_nhwc_output_shape',
@@ -36,9 +37,11 @@ class _CONSTANT:
          ('fold_add_v2', 'fold AddV2 op with constant inputs'),
          ('fold_cast', 'fold Cast op with constant input'),
          ('fold_dequantize', 'fold Dequantize op'),
+        ('fold_dwconv', 'fold Depthwise Convolution op with constant inputs'),
          ('fold_sparse_to_dense', 'fold SparseToDense op'),
          ('forward_reshape_to_unaryop', 'Forward Reshape op'),
          ('fuse_add_with_tconv', 'fuse Add op to Transposed'),
+        ('fuse_add_with_fully_connected', 'fuse Add op to FullyConnected op'),
          ('fuse_batchnorm_with_conv', 'fuse BatchNorm op to Convolution op'),
          ('fuse_batchnorm_with_dwconv', 'fuse BatchNorm op to Depthwise Convolution op'),
          ('fuse_batchnorm_with_tconv', 'fuse BatchNorm op to Transposed Convolution op'),
@@ -74,6 +77,8 @@ class _CONSTANT:
           'convert weight format of FullyConnected op to SHUFFLED16x1FLOAT32.'
           ' Note that it only converts weights whose row is a multiple of 16'),
          ('substitute_pack_to_reshape', 'convert single input Pack op to Reshape op'),
+        ('substitute_padv2_to_pad', 'convert certain condition PadV2 to Pad'),
+        ('substitute_splitv_to_split', 'convert certain condition SplitV to Split'),
          ('substitute_squeeze_to_reshape', 'convert certain condition Squeeze to Reshape'),
          ('substitute_strided_slice_to_reshape',
           'convert certain condition StridedSlice to Reshape'),
@@ -107,6 +112,14 @@ def _add_default_arg(parser):
      parser.add_argument('-S', '--section', type=str, help=argparse.SUPPRESS)
  
  
+def is_accumulated_arg(arg, driver):
+    if driver == "one-quantize":
+        if arg == "tensor_name" or arg == "scale" or arg == "zero_point":
+            return True
+
+    return False
+
+
  def _is_valid_attr(args, attr):
      return hasattr(args, attr) and getattr(args, attr)
  
@@ -124,6 +137,12 @@ def _parse_cfg(args, driver_name):
                  raise AssertionError('configuration file must have \'' + driver_name +
                                       '\' section')
              for key in config[args.section]:
+                if is_accumulated_arg(key, driver_name):
+                    if not _is_valid_attr(args, key):
+                        setattr(args, key, [config[args.section][key]])
+                    else:
+                        getattr(args, key).append(config[args.section][key])
+                    continue
                  if not _is_valid_attr(args, key):
                      setattr(args, key, config[args.section][key])
          # if section is not given, section name is same with its driver name
@@ -133,6 +152,12 @@ def _parse_cfg(args, driver_name):
                                       '\' section')
              secton_to_run = driver_name
              for key in config[secton_to_run]:
+                if is_accumulated_arg(key, driver_name):
+                    if not _is_valid_attr(args, key):
+                        setattr(args, key, [config[secton_to_run][key]])
+                    else:
+                        getattr(args, key).append(config[secton_to_run][key])
+                    continue
                  if not _is_valid_attr(args, key):
                      setattr(args, key, config[secton_to_run][key])
  
@@ -242,33 +267,26 @@ def _run(cmd, err_prefix=None, logfile=None):
          err_prefix: prefix to be put before every stderr lines
          logfile: file stream to which both of stdout and stderr lines will be written
      """
-    if logfile == None:
-        with subprocess.Popen(cmd, stderr=subprocess.PIPE, bufsize=1) as p:
-            for line in p.stderr:
-                if err_prefix:
-                    line = f"{err_prefix}: ".encode() + line
-                sys.stderr.buffer.write(line)
-                sys.stderr.buffer.flush()
-    else:
-        with subprocess.Popen(
-                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
-            import select
-            inputs = set([p.stdout, p.stderr])
-            while inputs:
-                readable, _, _ = select.select(inputs, [], [])
-                for x in readable:
-                    line = x.readline()
-                    if len(line) == 0:
-                        inputs.discard(x)
-                        continue
-                    if x == p.stdout:
-                        out = sys.stdout
-                    if x == p.stderr:
-                        out = sys.stderr
-                        if err_prefix:
-                            line = f"{err_prefix}: ".encode() + line
-                    out.buffer.write(line)
-                    out.buffer.flush()
+    with subprocess.Popen(
+            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1) as p:
+        import select
+        inputs = set([p.stdout, p.stderr])
+        while inputs:
+            readable, _, _ = select.select(inputs, [], [])
+            for x in readable:
+                line = x.readline()
+                if len(line) == 0:
+                    inputs.discard(x)
+                    continue
+                if x == p.stdout:
+                    out = sys.stdout
+                if x == p.stderr:
+                    out = sys.stderr
+                    if err_prefix:
+                        line = f"{err_prefix}: ".encode() + line
+                out.buffer.write(line)
+                out.buffer.flush()
+                if logfile != None:
                      logfile.write(line)
      if p.returncode != 0:
          sys.exit(p.returncode)
diff --git a/compiler/pota-quantization-value-test/CMakeLists.txt b/compiler/pota-quantization-value-test/CMakeLists.txt

index 80661e5666cae75ef8b76502ad21cc0ef3ec9b6b..00ffb57dead3c4a9642c92f33ac6b03b2a490bf6 100644 (file)
--- a/compiler/pota-quantization-value-test/CMakeLists.txt
+++ b/compiler/pota-quantization-value-test/CMakeLists.txt
@@ -1,7 +1,7 @@
  unset(QUANTIZATION_VALUE_TEST)
  unset(QUANTIZATION_VALUE_TEST_WITH_PARAM)
  
-nnas_find_package(FlatBuffers QUIET)
+nnas_find_package(FlatBuffers EXACT 1.10 QUIET)
  if(NOT FlatBuffers_FOUND)
    message(STATUS "Build pota-quantization-value-test: FAILED (missing FlatBuffers)")
    return()
@@ -25,7 +25,7 @@ get_target_property(SCHEMA_BIN_PATH mio_circle BINARY_DIR)
  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/gen_h5_explicit_inputs.py"
                 "${CMAKE_CURRENT_BINARY_DIR}/gen_h5_explicit_inputs.py" COPYONLY)
  
-set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_3_0")
+set(VIRTUALENV "${NNCC_OVERLAY_DIR}/venv_2_6_0")
  
  ###
  ### Generate test.config
diff --git a/compiler/pota-quantization-value-test/requires.cmake b/compiler/pota-quantization-value-test/requires.cmake

index 883a925dfe600cab67dbfe7d696ecb2abec3bc33..4eb7204e1c60e7152caa5a45badab60fe87296e5 100644 (file)
--- a/compiler/pota-quantization-value-test/requires.cmake
+++ b/compiler/pota-quantization-value-test/requires.cmake
@@ -2,3 +2,4 @@ require("record-minmax")
  require("circle-quantizer")
  require("circle-tensordump")
  require("common-artifacts")
+require("mio-circle")
diff --git a/compiler/tfl-inspect/CMakeLists.txt b/compiler/tfl-inspect/CMakeLists.txt

index ba019865f2dfd35ac49319b6ff42a0758aba7a1e..6ba55c357b5b3d69273df05ef98ab3a245534e13 100644 (file)
--- a/compiler/tfl-inspect/CMakeLists.txt
+++ b/compiler/tfl-inspect/CMakeLists.txt
@@ -10,5 +10,5 @@ add_executable(tfl-inspect ${DRIVER} ${SOURCES})
  target_include_directories(tfl-inspect PRIVATE src)
  target_link_libraries(tfl-inspect arser)
  target_link_libraries(tfl-inspect foder)
-target_link_libraries(tfl-inspect mio_tflite)
+target_link_libraries(tfl-inspect mio_tflite260)
  target_link_libraries(tfl-inspect safemain)
diff --git a/compiler/tfl-inspect/requires.cmake b/compiler/tfl-inspect/requires.cmake

index 25857ad2b9b109cc9e5ac8aeaddd6a7b2c07e5ae..9a7477b819f2ece0462fb54b718ae99de10e8af3 100644 (file)
--- a/compiler/tfl-inspect/requires.cmake
+++ b/compiler/tfl-inspect/requires.cmake
@@ -1,4 +1,4 @@
  require("arser")
  require("foder")
-require("mio-tflite")
+require("mio-tflite260")
  require("safemain")
diff --git a/compiler/tfl-inspect/src/Reader.cpp b/compiler/tfl-inspect/src/Reader.cpp

index 5be289446224ff69d8eee402b7a3b7d822eb4f5b..41a8396bbfab294643c9e55a34c08da0ec9e897b 100644 (file)
--- a/compiler/tfl-inspect/src/Reader.cpp
+++ b/compiler/tfl-inspect/src/Reader.cpp
@@ -16,21 +16,34 @@
  
  #include "Reader.h"
  
+#include <cassert>
  #include <sstream>
  #include <string>
  
  namespace tflinspect
  {
  
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  int8_t dp_code = opcode->deprecated_builtin_code();
+  // 127 is max of int8_t which is upper bound of v3 builtin_code
+  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+  if (dp_code < 127 && dp_code >= 0)
+    return tflite::BuiltinOperator(dp_code);
+  return opcode->builtin_code();
+}
+
  bool is_valid(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
  }
  
  bool is_custom(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (code == tflite::BuiltinOperator_CUSTOM);
  }
  
@@ -56,7 +69,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode)
      return custom_op;
    }
  
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return tflite::EnumNameBuiltinOperator(code);
  }
  
@@ -122,7 +135,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return tflinspect::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const tflite::Operator *op) const
diff --git a/compiler/tfl-inspect/src/Reader.h b/compiler/tfl-inspect/src/Reader.h

index e9e182a4b6d38131c9649b8c425e111a6ddcc434..91b7bb940fc1e837cf1c5f4a56a9404a82b99287 100644 (file)
--- a/compiler/tfl-inspect/src/Reader.h
+++ b/compiler/tfl-inspect/src/Reader.h
@@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
  bool is_valid(const tflite::OperatorCode *opcode);
  bool is_custom(const tflite::OperatorCode *opcode);
  std::string opcode_name(const tflite::OperatorCode *opcode);
diff --git a/compiler/tfl-verify/CMakeLists.txt b/compiler/tfl-verify/CMakeLists.txt

index 4421a4660599def65607f2f6b75439ff5214be5e..a87d30c5eb684c87897090d09305553039a68532 100644 (file)
--- a/compiler/tfl-verify/CMakeLists.txt
+++ b/compiler/tfl-verify/CMakeLists.txt
@@ -8,6 +8,6 @@ add_executable(tfl-verify ${SOURCES})
  target_include_directories(tfl-verify PRIVATE src)
  target_link_libraries(tfl-verify arser)
  target_link_libraries(tfl-verify foder)
-target_link_libraries(tfl-verify mio_tflite)
+target_link_libraries(tfl-verify mio_tflite260)
  target_link_libraries(tfl-verify safemain)
  target_link_libraries(tfl-verify cwrap)
diff --git a/compiler/tfl-verify/requires.cmake b/compiler/tfl-verify/requires.cmake

index 79503f325c647e4bdd71f92f44cf6fa7adb15207..72803d890eed6f6f152ef206b9a172de8c24ec9e 100644 (file)
--- a/compiler/tfl-verify/requires.cmake
+++ b/compiler/tfl-verify/requires.cmake
@@ -1,5 +1,5 @@
  require("arser")
  require("foder")
-require("mio-tflite")
+require("mio-tflite260")
  require("safemain")
  require("cwrap")
diff --git a/compiler/tflchef/CMakeLists.txt b/compiler/tflchef/CMakeLists.txt

index ebc873342dcbf73bae0dbe5fa80d681a863dfd3c..ac7fe4b7c3fb381a47764308f596d4c161949fb2 100644 (file)
--- a/compiler/tflchef/CMakeLists.txt
+++ b/compiler/tflchef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
    return()
  endif(NOT Protobuf_FOUND)
  
-if(NOT TARGET mio_tflite)
-  message(STATUS "Build tflchef: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite260)
+  message(STATUS "Build tflchef: FAILED (missing mio_tflite260)")
    return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite260)
  
  # Recipe Parser
  add_subdirectory(proto)
diff --git a/compiler/tflchef/core/CMakeLists.txt b/compiler/tflchef/core/CMakeLists.txt

index 43f6b8b035e0bec17f7aa4bc54c05369642d0293..413b78b15a1b745b6a2dc057e09b8366d4879ced 100644 (file)
--- a/compiler/tflchef/core/CMakeLists.txt
+++ b/compiler/tflchef/core/CMakeLists.txt
@@ -5,5 +5,5 @@ target_include_directories(tflchef_core PUBLIC include)
  target_include_directories(tflchef_core PRIVATE src)
  target_link_libraries(tflchef_core tflchef_proto)
  target_link_libraries(tflchef_core tflchef_log)
-target_link_libraries(tflchef_core mio_tflite)
+target_link_libraries(tflchef_core mio_tflite260)
  target_link_libraries(tflchef_core souschef)
diff --git a/compiler/tflchef/core/src/CustomOp/AddV2.cpp b/compiler/tflchef/core/src/CustomOp/AddV2.cpp

index dffd336cdde87e7ce7001cb21046f87b02814afd..557c20bce112037cdcaa4e9a3bbcc6cab868d10b 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/AddV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/AddV2.cpp
@@ -17,7 +17,7 @@
  
  #include "AddV2.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> AddV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/All.cpp b/compiler/tflchef/core/src/CustomOp/All.cpp

index b3ae821a4b746cc453af052c8b1e82ff86a76a8c..bbef5ecaa372a71463843fa593fb75910e171885 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/All.cpp
+++ b/compiler/tflchef/core/src/CustomOp/All.cpp
@@ -17,7 +17,7 @@
  
  #include "All.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> AllChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp

index 595f3b9bb3f4d39256d9218c703d472c5378fee9..6d2c5b13b618e1c1392be0176acce57aebca3164 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
+++ b/compiler/tflchef/core/src/CustomOp/BatchMatMulV2.cpp
@@ -17,7 +17,7 @@
  
  #include "BatchMatMulV2.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> BatchMatMulV2Chef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp

index fc429e2f7885f9aecc8b9f88b4c9ee301ebbed6a..dd458b376c7ba651342cc92493cef57cd5cab81a 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
+++ b/compiler/tflchef/core/src/CustomOp/BroadcastTo.cpp
@@ -17,7 +17,7 @@
  
  #include "BroadcastTo.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> BroadcastToChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/MatMul.cpp b/compiler/tflchef/core/src/CustomOp/MatMul.cpp

index ba34aa8db2711504798d2f72ce80b697e85bcb4f..e7c707d3722f815a75329e0d9177208cf5782835 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/MatMul.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatMul.cpp
@@ -17,7 +17,7 @@
  
  #include "MatMul.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> MatMulChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp

index d12597edba447e141bb9c00932d40b07384cb3be..b2500322789881b11e29ac36318e2dae730639fa 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MatrixBandPart.cpp
@@ -17,7 +17,7 @@
  
  #include "MatrixBandPart.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> MatrixBandPartChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp

index 9dacf7bf62f862705ef6d0ee42c5dedb27b68aa4..290d3c2cace063e14e94c6c617eb80abe24245d3 100644 (file)
--- a/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
+++ b/compiler/tflchef/core/src/CustomOp/MaxPoolWithArgmax.cpp
@@ -17,7 +17,7 @@
  
  #include "MaxPoolWithArgmax.h"
  
-#include "flatbuffers/flexbuffers.h"
+#include <flatbuffers/flexbuffers.h>
  
  flatbuffers::Offset<void> MaxPoolWithArgmaxChef::value(flatbuffers::FlatBufferBuilder &fbb) const
  {
diff --git a/compiler/tflchef/core/src/ModelChef.cpp b/compiler/tflchef/core/src/ModelChef.cpp

index aba20dcbf53cf94e5de43174ad3cf679b0103ce2..7028bd9aca10b41ecf74630a6b5f504e0ddad817 100644 (file)
--- a/compiler/tflchef/core/src/ModelChef.cpp
+++ b/compiler/tflchef/core/src/ModelChef.cpp
@@ -582,8 +582,11 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
    for (auto const &opcode : builtin_code_map)
    {
      tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
-    code_builder.add_builtin_code(opcode.first);
+    // TODO support for opcode.first >= 127
+    assert(opcode.first < 127);
+    code_builder.add_deprecated_builtin_code(opcode.first);
      code_builder.add_version(opcode.second);
+    code_builder.add_builtin_code(opcode.first);
      auto code = code_builder.Finish();
      // Update OperatorCode vector
      code_vec.emplace_back(code);
@@ -597,8 +600,9 @@ GeneratedModel cook(const ::tflchef::ModelRecipe &model_recipe)
    {
      auto custom_code = flatbuffer_builder->CreateString(opcode);
      tflite::OperatorCodeBuilder code_builder{*flatbuffer_builder};
-    code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
+    code_builder.add_deprecated_builtin_code(tflite::BuiltinOperator_CUSTOM);
      code_builder.add_custom_code(custom_code);
+    code_builder.add_builtin_code(tflite::BuiltinOperator_CUSTOM);
      auto code = code_builder.Finish();
      // Update OperatorCode vector
      code_vec.emplace_back(code);
diff --git a/compiler/tflchef/requires.cmake b/compiler/tflchef/requires.cmake

index 4c02174b5d9428a9552a71afc31c19660ea996b7..78bfa2d076188fffd53d50adfab262dc292ad2ed 100644 (file)
--- a/compiler/tflchef/requires.cmake
+++ b/compiler/tflchef/requires.cmake
@@ -1,7 +1,7 @@
  require("arser")
  require("nnkit")
  require("cwrap")
-require("mio-tflite")
+require("mio-tflite260")
  require("safemain")
  require("hermes")
  require("hermes-std")
diff --git a/compiler/tflchef/tflite/CMakeLists.txt b/compiler/tflchef/tflite/CMakeLists.txt

index ce8b8c463a8535d682cad02072e269537a26ae00..3c4c3fff65c266a1f73472d778f0ac769acf894f 100644 (file)
--- a/compiler/tflchef/tflite/CMakeLists.txt
+++ b/compiler/tflchef/tflite/CMakeLists.txt
@@ -4,6 +4,6 @@ add_library(tflchef_tflite STATIC ${SOURCES})
  target_include_directories(tflchef_tflite PUBLIC include)
  target_include_directories(tflchef_tflite PRIVATE src)
  target_link_libraries(tflchef_tflite tflchef_proto)
-target_link_libraries(tflchef_tflite mio_tflite)
+target_link_libraries(tflchef_tflite mio_tflite260)
  target_link_libraries(tflchef_tflite cwrap)
  target_link_libraries(tflchef_tflite souschef)
diff --git a/compiler/tflchef/tflite/src/TFliteImport.cpp b/compiler/tflchef/tflite/src/TFliteImport.cpp

index 51d9b5ffa19927955c99b12fbe7a0b5abc53ad03..1462ee7f434e8a8361ea513438d3a9799baadd15 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteImport.cpp
+++ b/compiler/tflchef/tflite/src/TFliteImport.cpp
@@ -38,15 +38,27 @@ const char *tensor_name(const tflite::Tensor *tensor)
    return kEmptyTensorName;
  }
  
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  int8_t dp_code = opcode->deprecated_builtin_code();
+  // 127 is max of int8_t which is upper bound of v3 builtin_code
+  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+  if (dp_code < 127 && dp_code >= 0)
+    return tflite::BuiltinOperator(dp_code);
+  return opcode->builtin_code();
+}
+
  bool is_valid(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
  }
  
  bool is_custom(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (code == tflite::BuiltinOperator_CUSTOM);
  }
  
@@ -92,7 +104,7 @@ tflite::BuiltinOperator TFliteImport::builtin_code(const tflite::Operator *op) c
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return builtin_code_neutral(opcode);
  }
  
  std::string TFliteImport::opcode_name(const tflite::Operator *op) const
@@ -116,7 +128,7 @@ std::string TFliteImport::opcode_name(const tflite::Operator *op) const
      return opcode->custom_code()->c_str();
    }
  
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return EnumNameBuiltinOperator(code);
  }
  
diff --git a/compiler/tflchef/tflite/src/TFliteImport.h b/compiler/tflchef/tflite/src/TFliteImport.h

index 9d0a642ab477e0dd00b450619f81348d008d0177..43b5bbaffc02af172510f61c81da58daa0c11462 100644 (file)
--- a/compiler/tflchef/tflite/src/TFliteImport.h
+++ b/compiler/tflchef/tflite/src/TFliteImport.h
@@ -36,6 +36,7 @@ using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operat
  
  const char *tensor_type(const tflite::Tensor *tensor);
  const char *tensor_name(const tflite::Tensor *tensor);
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
  bool is_valid(const tflite::OperatorCode *opcode);
  bool is_custom(const tflite::OperatorCode *opcode);
  
diff --git a/compiler/tfldump/CMakeLists.txt b/compiler/tfldump/CMakeLists.txt

index e6afcb6d2cda17eef93cfd9570c96c5fcbcad4d8..83f7febad32e18bc39ef90f31237e1ed8231aee6 100644 (file)
--- a/compiler/tfldump/CMakeLists.txt
+++ b/compiler/tfldump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_tflite)
-  message(STATUS "Build tfldump: FAILED (missing mio_tflite)")
+if(NOT TARGET mio_tflite260)
+  message(STATUS "Build tfldump: FAILED (missing mio_tflite260)")
    return()
-endif(NOT TARGET mio_tflite)
+endif(NOT TARGET mio_tflite260)
  
  set(DRIVER "driver/Driver.cpp")
  
@@ -10,6 +10,6 @@ file(GLOB_RECURSE SOURCES "src/*.cpp")
  add_executable(tfldump ${DRIVER} ${SOURCES})
  target_include_directories(tfldump PRIVATE include)
  target_link_libraries(tfldump arser)
-target_link_libraries(tfldump mio_tflite)
+target_link_libraries(tfldump mio_tflite260)
  target_link_libraries(tfldump safemain)
-target_link_libraries(tfldump flatbuffers)
+target_link_libraries(tfldump flatbuffers-1.12)
diff --git a/compiler/tfldump/requires.cmake b/compiler/tfldump/requires.cmake

index 2cdd3a391d44ebd682e9ee305bda3fc719af289b..d0f9cccba0bbf6b15ff0809be7db2a4d3d41090a 100644 (file)
--- a/compiler/tfldump/requires.cmake
+++ b/compiler/tfldump/requires.cmake
@@ -1,3 +1,3 @@
  require("arser")
-require("mio-tflite")
+require("mio-tflite260")
  require("safemain")
diff --git a/compiler/tfldump/src/Dump.cpp b/compiler/tfldump/src/Dump.cpp

index 20e1343e640ac926f7dfa68ca4559bd461a8cdcf..7a480bc5229ffdaafd6dcbbeb367e0b50a87896f 100644 (file)
--- a/compiler/tfldump/src/Dump.cpp
+++ b/compiler/tfldump/src/Dump.cpp
@@ -350,6 +350,7 @@ void dump_model(std::ostream &os, const tflite::Model *model)
    auto opcodes = reader.opcodes();
    auto buffers = reader.buffers();
    auto metadata = reader.metadata();
+  auto signaturedefs = reader.signaturedefs();
  
    // dump operator_codes
    os << "Operator Codes: [order] OpCodeName (OpCode Enum)" << std::endl;
@@ -357,11 +358,13 @@ void dump_model(std::ostream &os, const tflite::Model *model)
    for (auto opcode : opcodes)
    {
      tflite::BuiltinOperator op_code = opcode->builtin_code();
+    tflite::BuiltinOperator dp_code = tflite::BuiltinOperator(opcode->deprecated_builtin_code());
+
      auto op_name = tflread::opcode_name(opcode);
      auto op_version = opcode->version();
  
      os << "[" << opcode_index << "] " << op_name << " (code: " << op_code
-       << ", version: " << op_version << ")" << std::endl;
+       << ", dep_code: " << dp_code << ", version: " << op_version << ")" << std::endl;
  
      opcode_index++;
    }
@@ -389,7 +392,38 @@ void dump_model(std::ostream &os, const tflite::Model *model)
      os << "metadata : B(index) name" << std::endl;
      for (uint32_t i = 0; i < metadata->Length(); ++i)
      {
-      os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str();
+      os << "B(" << metadata->Get(i)->buffer() << ") " << metadata->Get(i)->name()->c_str()
+         << std::endl;
+    }
+    os << std::endl;
+  }
+
+  // dump signaturedef
+  if (signaturedefs != nullptr)
+  {
+    os << "SignatureDef" << std::endl;
+    for (uint32_t i = 0; i < signaturedefs->Length(); ++i)
+    {
+      auto sign_i = signaturedefs->Get(i);
+      os << "S(" << i << ") " << sign_i->method_name()->c_str() << ", key("
+         << sign_i->key()->c_str() << "), sub_graph(" << sign_i->subgraph_index() << ")"
+         << std::endl;
+
+      auto inputs_i = sign_i->inputs();
+      for (uint32_t t = 0; t < inputs_i->Length(); ++t)
+      {
+        auto inputs_i_t = inputs_i->Get(t);
+        os << "    I T(" << t << ") " << inputs_i_t->name()->c_str() << ": "
+           << inputs_i_t->tensor_index() << std::endl;
+      }
+
+      auto outputs_i = sign_i->outputs();
+      for (uint32_t t = 0; t < outputs_i->Length(); ++t)
+      {
+        auto outputs_i_t = outputs_i->Get(t);
+        os << "    O T(" << t << ") " << outputs_i_t->name()->c_str() << ": "
+           << outputs_i_t->tensor_index() << std::endl;
+      }
      }
      os << std::endl;
    }
diff --git a/compiler/tfldump/src/Read.cpp b/compiler/tfldump/src/Read.cpp

index 856cc56998575ee24a61fc6ca7c0e567f4ea37af..8b3a96e837d481a05a97ecc4d1b5fe5bdebb373e 100644 (file)
--- a/compiler/tfldump/src/Read.cpp
+++ b/compiler/tfldump/src/Read.cpp
@@ -22,15 +22,25 @@
  namespace tflread
  {
  
+// This will provide v3/v3a format neutral BuiltinOperator
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  int8_t dp_code = opcode->deprecated_builtin_code();
+  if (dp_code < 127 && dp_code >= 0)
+    return tflite::BuiltinOperator(dp_code);
+  return opcode->builtin_code();
+}
+
  bool is_valid(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (tflite::BuiltinOperator_MIN <= code && code <= tflite::BuiltinOperator_MAX);
  }
  
  bool is_custom(const tflite::OperatorCode *opcode)
  {
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return (code == tflite::BuiltinOperator_CUSTOM);
  }
  
@@ -56,7 +66,7 @@ std::string opcode_name(const tflite::OperatorCode *opcode)
      return custom_op;
    }
  
-  tflite::BuiltinOperator code = opcode->builtin_code();
+  tflite::BuiltinOperator code = builtin_code_neutral(opcode);
    return tflite::EnumNameBuiltinOperator(code);
  }
  
@@ -82,6 +92,7 @@ Reader::Reader(const tflite::Model *model)
    _subgraphs = model->subgraphs();
    _buffers = model->buffers();
    _metadata = model->metadata();
+  _signaturedefs = model->signature_defs();
  
    auto opcodes = model->operator_codes();
    for (const ::tflite::OperatorCode *opcode : *opcodes)
@@ -118,7 +129,7 @@ tflite::BuiltinOperator Reader::builtin_code(const tflite::Operator *op) const
    assert(index < _op_codes.size());
    const tflite::OperatorCode *opcode = _op_codes.at(index);
  
-  return opcode->builtin_code();
+  return tflread::builtin_code_neutral(opcode);
  }
  
  std::string Reader::opcode_name(const tflite::Operator *op) const
diff --git a/compiler/tfldump/src/Read.h b/compiler/tfldump/src/Read.h

index f835be1404d8cec71e85e56be2f802c8e3b6b7c6..80f317d0b929f43a88b347bb58fc34657b535a73 100644 (file)
--- a/compiler/tfldump/src/Read.h
+++ b/compiler/tfldump/src/Read.h
@@ -36,6 +36,7 @@ template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T
    return ret;
  }
  
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode);
  bool is_valid(const tflite::OperatorCode *opcode);
  bool is_custom(const tflite::OperatorCode *opcode);
  std::string opcode_name(const tflite::OperatorCode *opcode);
@@ -53,6 +54,7 @@ private:
    using TFliteTensors_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Tensor>>;
    using TFliteOperators_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Operator>>;
    using TFliteMetadata_t = flatbuffers::Vector<flatbuffers::Offset<tflite::Metadata>>;
+  using TFliteSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<tflite::SignatureDef>>;
  
  public:
    Reader(const tflite::Model *model);
@@ -69,6 +71,7 @@ public:
    const std::vector<int32_t> &inputs() const { return _inputs; }
    const std::vector<int32_t> &outputs() const { return _outputs; }
    const TFliteMetadata_t *metadata() const { return _metadata; }
+  const TFliteSignatureDef_t *signaturedefs() const { return _signaturedefs; }
  
    uint32_t num_subgraph() const { return _subgraphs->Length(); }
  
@@ -89,6 +92,7 @@ private:
    const TFliteTensors_t *_tensors{nullptr};
    const TFliteOperators_t *_operators{nullptr};
    const TFliteMetadata_t *_metadata{nullptr};
+  const TFliteSignatureDef_t *_signaturedefs{nullptr};
  
    uint32_t _subgraph_index;
    std::string _subgraph_name;
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt

index 3e46dd803154889347cd95532c4185c69ae1469b..4ea01ad3157a4a38ca0852e29524279f825d2a5d 100644 (file)
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -1,7 +1,7 @@
  nnas_include(TargetRequire)
  
  unset(REQUIRED_TARGETS)
-list(APPEND REQUIRED_TARGETS mio_tflite)
+list(APPEND REQUIRED_TARGETS mio_tflite260)
  list(APPEND REQUIRED_TARGETS mio_circle)
  TargetRequire_Return(${REQUIRED_TARGETS})
  
@@ -11,8 +11,9 @@ add_executable(tflite2circle ${DRIVER} ${SOURCES})
  target_include_directories(tflite2circle PRIVATE include)
  target_include_directories(tflite2circle PRIVATE src)
  target_link_libraries(tflite2circle arser)
+target_link_libraries(tflite2circle foder)
  target_link_libraries(tflite2circle safemain)
-target_link_libraries(tflite2circle mio_tflite)
+target_link_libraries(tflite2circle mio_tflite260)
  target_link_libraries(tflite2circle mio_circle)
  target_link_libraries(tflite2circle vconone)
  target_link_libraries(tflite2circle nncc_coverage)
diff --git a/compiler/tflite2circle/driver/Driver.cpp b/compiler/tflite2circle/driver/Driver.cpp

index fc7ee40421773ce28d2746d25f82efab33314a12..4015631ab2fb5ef47618decf8757c47239050625 100644 (file)
--- a/compiler/tflite2circle/driver/Driver.cpp
+++ b/compiler/tflite2circle/driver/Driver.cpp
@@ -70,9 +70,9 @@ int entry(int argc, char **argv)
    std::string circle_path = arser.get<std::string>("circle");
    // read tflite file
    tflite2circle::TFLModel tfl_model(tfl_path);
-  if (!tfl_model.is_valid())
+  if (not tfl_model.verify_data())
    {
-    std::cerr << "ERROR: Failed to load tflite '" << tfl_path << "'" << std::endl;
+    std::cerr << "ERROR: Failed to verify tflite '" << tfl_path << "'" << std::endl;
      return 255;
    }
  
@@ -80,7 +80,7 @@ int entry(int argc, char **argv)
    auto flatbuffer_builder = std::make_unique<flatbuffers::FlatBufferBuilder>(1024);
  
    // convert tflite to circle
-  tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model};
+  tflite2circle::CircleModel circle_model{flatbuffer_builder, tfl_model.get_model()};
  
    std::ofstream outfile{circle_path, std::ios::binary};
  
diff --git a/compiler/tflite2circle/include/CircleModel.h b/compiler/tflite2circle/include/CircleModel.h

index e1e35d8ffb0404feede77ae397bb3206f3765e53..14c4f1c12d9ae4c381a0e972f14f9da4a3ec98ed 100644 (file)
--- a/compiler/tflite2circle/include/CircleModel.h
+++ b/compiler/tflite2circle/include/CircleModel.h
@@ -63,12 +63,17 @@ private:
  
  public:
    Offset(void) = delete;
-  Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
+  Offset(FlatBufBuilder &fb) : _fb{fb} {};
+
+public:
+  // TODO use _fb
+  void build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec);
  
  public:
    CIRFlatBufVecOffset offset(void) const { return _circle_flatbuffer_vec_offset; }
  
  private:
+  FlatBufBuilder &_fb;
    CIRFlatBufVecOffset _circle_flatbuffer_vec_offset;
  };
  
@@ -79,7 +84,7 @@ private:
  
  public:
    CircleModel(void) = delete;
-  CircleModel(FlatBufBuilder &fb, TFLModel &tfl_model);
+  CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model);
  
  public:
    void model_build(void) const;
diff --git a/compiler/tflite2circle/include/TFLModel.h b/compiler/tflite2circle/include/TFLModel.h

index e53d62749dbfc22cba5b3b4328d7a5a6c61ebad6..507667bb90389c8fe2a79f2a6f20ba4ffd38ff5d 100644 (file)
--- a/compiler/tflite2circle/include/TFLModel.h
+++ b/compiler/tflite2circle/include/TFLModel.h
@@ -37,15 +37,14 @@ public:
    TFLModel(const std::string &path);
  
  public:
-  bool is_valid(void) { return _valid; }
+  const tflite::Model *get_model(void);
  
-private:
-  const tflite::Model *load_model(void);
+public:
+  bool verify_data(void);
  
  private:
    std::ifstream _infile;
    DataBuffer _data;
-  bool _valid;
  
    friend class CircleModel;
  };
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake

index 837c287b692cbb29ee1ca4995e906f8f174e719d..e39f9eeafc1eeced4a2b8f68b331a39a8b427ea7 100644 (file)
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,5 +1,6 @@
  require("arser")
-require("mio-tflite")
+require("foder")
+require("mio-tflite260")
  require("mio-circle")
  require("safemain")
  require("vconone")
diff --git a/compiler/tflite2circle/src/CircleModel.cpp b/compiler/tflite2circle/src/CircleModel.cpp

index ab0b5b5076c19380c05b37fb11ad06e8951f1bdc..4249f1560cd7dadb777c3dd14956b0b486028db1 100644 (file)
--- a/compiler/tflite2circle/src/CircleModel.cpp
+++ b/compiler/tflite2circle/src/CircleModel.cpp
@@ -14,6 +14,7 @@
   * limitations under the License.
   */
  
+#include <cassert>
  #include <iostream>
  #include <memory>
  
@@ -24,7 +25,8 @@ namespace tflite2circle
  {
  
  template <>
-Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<MetaDataBufferLink>::build(FlatBufBuilder &fb,
+                                       const TFLFlatBufVec *tflite_flatbuffer_vec)
  {
    if (tflite_flatbuffer_vec == nullptr)
      return;
@@ -34,7 +36,7 @@ Offset<MetaDataBufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tfli
  }
  
  template <>
-Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<BufferLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
  {
    std::vector<flatbuffers::Offset<circle::Buffer>> buffers_vec;
  
@@ -55,7 +57,7 @@ Offset<BufferLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatb
  }
  
  template <>
-Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<SubGraphLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
  {
    std::vector<flatbuffers::Offset<circle::SubGraph>> subgprahs_vec;
  
@@ -278,8 +280,19 @@ Offset<SubGraphLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_fla
    _circle_flatbuffer_vec_offset = fb->CreateVector(subgprahs_vec);
  }
  
+tflite::BuiltinOperator builtin_code_neutral(const tflite::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  int8_t dp_code = opcode->deprecated_builtin_code();
+  // 127 is max of int8_t which is upper bound of v3 builtin_code
+  // NOTE TensorFlow uses 'BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES' for 127
+  if (dp_code < 127 && dp_code >= 0)
+    return tflite::BuiltinOperator(dp_code);
+  return opcode->builtin_code();
+}
+
  template <>
-Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
+void Offset<OperatorCodeLink>::build(FlatBufBuilder &fb, const TFLFlatBufVec *tflite_flatbuffer_vec)
  {
    std::vector<flatbuffers::Offset<circle::OperatorCode>> operator_code_vec;
  
@@ -287,7 +300,9 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
    {
      auto custom_code = fb->CreateString(it->custom_code());
      circle::OperatorCodeBuilder operator_code_builder{*fb};
-    operator_code_builder.add_builtin_code(get_circle_builtin_code(it->builtin_code()));
+    // TODO support circle deprecated_builtin_code
+    auto bt_code = builtin_code_neutral(it);
+    operator_code_builder.add_builtin_code(get_circle_builtin_code(bt_code));
      operator_code_builder.add_custom_code(custom_code);
      operator_code_builder.add_version(it->version());
      auto code = operator_code_builder.Finish();
@@ -296,24 +311,19 @@ Offset<OperatorCodeLink>::Offset(FlatBufBuilder &fb, const TFLFlatBufVec *tflite
    _circle_flatbuffer_vec_offset = fb->CreateVector(operator_code_vec);
  }
  
-CircleModel::CircleModel(FlatBufBuilder &fb, TFLModel &model)
-  : _version{0}, _description{fb->CreateString("nnpackage")}, _fb{fb}
+CircleModel::CircleModel(FlatBufBuilder &fb, const tflite::Model *tfl_model)
+  : _version{0}, _description{fb->CreateString("ONE-tflite2circle")}, _fb{fb}
  {
-  const tflite::Model *tfl_model = model.load_model();
-  // verify flatbuffers
-  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(model._data.data()),
-                                 model._data.size()};
-  if (!tflite::VerifyModelBuffer(verifier))
-  {
-    throw std::runtime_error("Failed to verify tflite");
-  }
+  _operator_codes_offset = std::make_unique<Offset<OperatorCodeLink>>(fb);
+  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb);
+  _buffers_offset = std::make_unique<Offset<BufferLink>>(fb);
+  _metadata_buffer_offset = std::make_unique<Offset<MetaDataBufferLink>>(fb);
+
+  _operator_codes_offset->build(fb, tfl_model->operator_codes());
+  _subGraphs_offset->build(fb, tfl_model->subgraphs());
+  _buffers_offset->build(fb, tfl_model->buffers());
+  _metadata_buffer_offset->build(fb, tfl_model->metadata_buffer());
  
-  _operator_codes_offset =
-    std::make_unique<Offset<OperatorCodeLink>>(fb, tfl_model->operator_codes());
-  _subGraphs_offset = std::make_unique<Offset<SubGraphLink>>(fb, tfl_model->subgraphs());
-  _buffers_offset = std::make_unique<Offset<BufferLink>>(fb, tfl_model->buffers());
-  _metadata_buffer_offset =
-    std::make_unique<Offset<MetaDataBufferLink>>(fb, tfl_model->metadata_buffer());
    model_build();
  }
  
diff --git a/compiler/tflite2circle/src/TFLModel.cpp b/compiler/tflite2circle/src/TFLModel.cpp

index 33f11fb83930b39d385ecec8b5af088d0a411070..470b1aec72ffda7e96f4df3431e5e7e796b9c551 100644 (file)
--- a/compiler/tflite2circle/src/TFLModel.cpp
+++ b/compiler/tflite2circle/src/TFLModel.cpp
@@ -16,6 +16,8 @@
  
  #include <iostream>
  
+#include <foder/FileLoader.h>
+
  #include "TFLModel.h"
  
  namespace tflite2circle
@@ -23,21 +25,21 @@ namespace tflite2circle
  
  TFLModel::TFLModel(const std::string &path)
  {
-  _infile.open(path, std::ios::binary | std::ios::in);
-  _valid = _infile.good();
+  foder::FileLoader file_loader{path};
+  _data = file_loader.load();
  }
  
-const tflite::Model *TFLModel::load_model(void)
+bool TFLModel::verify_data(void)
  {
-  assert(_valid == true);
-  _infile.seekg(0, std::ios::end);
-  auto fileSize = _infile.tellg();
-  _infile.seekg(0, std::ios::beg);
-  _data.resize(fileSize);
-  _infile.read(_data.data(), fileSize);
-  _infile.close();
-
-  return tflite::GetModel(_data.data());
+  // verify flatbuffers
+  flatbuffers::Verifier verifier{reinterpret_cast<const uint8_t *>(_data.data()), _data.size()};
+  if (not tflite::VerifyModelBuffer(verifier))
+  {
+    return false;
+  }
+  return true;
  }
  
+const tflite::Model *TFLModel::get_model(void) { return tflite::GetModel(_data.data()); }
+
  } // namespace tflite2circle
diff --git a/compiler/vconone/CMakeLists.txt b/compiler/vconone/CMakeLists.txt

index 1cf7c0c45dd9e22c62f9aaa03144a985180d06bc..50ee052429f6225c328cd14a95238062e9cfea6c 100644 (file)
--- a/compiler/vconone/CMakeLists.txt
+++ b/compiler/vconone/CMakeLists.txt
@@ -1,5 +1,5 @@
  if (NOT VCONONE_VERSION)
-  set(VCONONE_VERSION 0x0000000100110000)
+  set(VCONONE_VERSION 0x0000000000120001)
    # NOTE order is [build patch minor major]
    # if VCONONE_VERSION is set with -D option, it will be cached
    # you may have to remove cache file if you remove -D option
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp

index 1a180a35b791877cf0c2cf317bf89be2d78e5ee8..e15dc2685cbaad2876ea2ff226d0db1fe7609f9c 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -83,10 +83,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
    {"topkv2_find_first_negative", "topkv2.cl"},
    {"topkv2_reorder_negatives", "topkv2.cl"},
    {"topkv2_store", "topkv2.cl"},
-  {"radixsort_histogram", "topkv2_radixsort.cl"},
-  {"radixsort_scanhistograms", "topkv2_radixsort.cl"},
-  {"radixsort_pastehistograms", "topkv2_radixsort.cl"},
-  {"radixsort_reorder", "topkv2_radixsort.cl"},
    {"topkv2_quicksort", "topkv2_quicksort.cl"},
    {"scale_factor_symm8", "scale_factor.cl"},
  };
@@ -184,10 +180,6 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
    {
      "topkv2.cl",
  #include "./cl_kernels/topkv2.clembed"
-  },
-  {
-    "topkv2_radixsort.cl",
-#include "./cl_kernels/topkv2_radixsort.clembed"
    },
    {
      "topkv2_quicksort.cl",
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl

deleted file mode 100644 (file)

index e9d4696..0000000
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-// reference:
-// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
-// OpenCL kernel sources for the CLRadixSort class
-// the #include does not exist in OpenCL
-// Copyright Philippe Helluy, Université de Strasbourg, France, 2011, helluy@math.unistra.fr
-// licensed under the GNU Lesser General Public License see http://www.gnu.org/copyleft/lesser.html
-// if you find this software usefull you can cite the following work in your reports or articles:
-// Philippe HELLUY, A portable implementation of the radix sort algorithm in OpenCL, 2011.
-// http://hal.archives-ouvertes.fr/hal-00596730
-
-// Reference for floating point radix sort:
-// http://www.codercorner.com/RadixSortRevisited.htm
-
-// compute the histogram for each radix and each virtual processor for the pass
-__kernel void radixsort_histogram(__global float *in_key_buf, __global int *d_Histograms,
-                                  const int pass, __local int *loc_histo, const int n)
-{
-  int it = get_local_id(0);  // i local number of the processor
-  int ig = get_global_id(0); // global number = i + g I
-
-  int gr = get_group_id(0); // g group number
-
-  int groups = get_num_groups(0);
-  int items = get_local_size(0);
-
-  // set the local histograms to zero
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    loc_histo[ir * items + it] = 0;
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // range of keys that are analyzed by the work item
-  int size = n / groups / items; // size of the sub-list
-  int start = ig * size;         // beginning of the sub-list
-
-  unsigned int key;
-  int shortkey, k;
-
-  // compute the index
-  // the computation depends on the transposition
-  for (int j = 0; j < size; j++)
-  {
-#ifdef TRANSPOSE
-    k = groups * items * j + ig;
-#else
-    k = j + start;
-#endif
-
-    key = *((__global unsigned int *)(in_key_buf + k));
-
-    // extract the group of _BITS bits of the pass
-    // the result is in the range 0.._RADIX-1
-    shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
-    // increment the local histogram
-    loc_histo[shortkey * items + it]++;
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // copy the local histogram to the global one
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    d_Histograms[items * (ir * groups + gr) + it] = loc_histo[ir * items + it];
-  }
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// initial transpose of the list for improving
-// coalescent memory access
-__kernel void transpose(const __global int *invect, __global int *outvect, const int nbcol,
-                        const int nbrow, const __global int *inperm, __global int *outperm,
-                        __local int *blockmat, __local int *blockperm, const int tilesize)
-{
-
-  int i0 = get_global_id(0) * tilesize; // first row index
-  int j = get_global_id(1);             // column index
-
-  int jloc = get_local_id(1); // local column index
-
-  // fill the cache
-  for (int iloc = 0; iloc < tilesize; iloc++)
-  {
-    int k = (i0 + iloc) * nbcol + j; // position in the matrix
-    blockmat[iloc * tilesize + jloc] = invect[k];
-#ifdef PERMUT
-    blockperm[iloc * tilesize + jloc] = inperm[k];
-#endif
-  }
-
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // first row index in the transpose
-  int j0 = get_group_id(1) * tilesize;
-
-  // put the cache at the good place
-  for (int iloc = 0; iloc < tilesize; iloc++)
-  {
-    int kt = (j0 + iloc) * nbrow + i0 + jloc; // position in the transpose
-    outvect[kt] = blockmat[jloc * tilesize + iloc];
-#ifdef PERMUT
-    outperm[kt] = blockperm[jloc * tilesize + iloc];
-#endif
-  }
-}
-
-// each virtual processor reorders its data using the scanned histogram
-__kernel void radixsort_reorder(__global float *in_key, __global float *out_key,
-                                __global int *d_Histograms, const int pass,
-                                __global int *indices_in, __global int *indices_out,
-                                __local int *loc_histo, const int n)
-{
-
-  int it = get_local_id(0);
-  int ig = get_global_id(0);
-
-  int gr = get_group_id(0);
-  int groups = get_num_groups(0);
-  int items = get_local_size(0);
-
-  int start = ig * (n / groups / items);
-  int size = n / groups / items;
-
-  // take the histogram in the cache
-  for (int ir = 0; ir < _RADIX; ir++)
-  {
-    loc_histo[ir * items + it] = d_Histograms[items * (ir * groups + gr) + it];
-  }
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  int newpos, shortkey, k, newpost;
-  unsigned int key;
-
-  for (int j = 0; j < size; j++)
-  {
-#ifdef TRANSPOSE
-    k = groups * items * j + ig;
-#else
-    k = j + start;
-#endif
-    float org_value = in_key[k];
-    key = *(__global unsigned int *)(in_key + k);
-    shortkey = ((key >> (pass * _BITS)) & (_RADIX - 1));
-
-    newpos = loc_histo[shortkey * items + it];
-
-#ifdef TRANSPOSE
-    int ignew, jnew;
-    ignew = newpos / (n / groups / items);
-    jnew = newpos % (n / groups / items);
-    newpost = jnew * (groups * items) + ignew;
-#else
-    newpost = newpos;
-#endif
-
-    // d_outKeys[newpost]= key;  // killing line !!!
-    out_key[newpost] = org_value;
-
-#ifdef PERMUT
-    indices_out[newpost] = indices_in[k];
-#endif
-
-    newpos++;
-    loc_histo[shortkey * items + it] = newpos;
-  }
-}
-
-// perform a parallel prefix sum (a scan) on the local histograms
-// (see Blelloch 1990) each workitem worries about two memories
-// see also http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html
-__kernel void radixsort_scanhistograms(__global int *histo, __local int *temp,
-                                       __global int *globsum)
-{
-  int it = get_local_id(0);
-  int ig = get_global_id(0);
-  int decale = 1;
-  int n = get_local_size(0) * 2;
-  int gr = get_group_id(0);
-
-  // load input into local memory
-  // up sweep phase
-  temp[2 * it] = histo[2 * ig];
-  temp[2 * it + 1] = histo[2 * ig + 1];
-
-  // parallel prefix sum (algorithm of Blelloch 1990)
-  for (int d = n >> 1; d > 0; d >>= 1)
-  {
-    barrier(CLK_LOCAL_MEM_FENCE);
-    if (it < d)
-    {
-      int ai = decale * (2 * it + 1) - 1;
-      int bi = decale * (2 * it + 2) - 1;
-      temp[bi] += temp[ai];
-    }
-    decale *= 2;
-  }
-
-  // store the last element in the global sum vector
-  // (maybe used in the next step for constructing the global scan)
-  // clear the last element
-  if (it == 0)
-  {
-    globsum[gr] = temp[n - 1];
-    temp[n - 1] = 0;
-  }
-
-  // down sweep phase
-  for (int d = 1; d < n; d *= 2)
-  {
-    decale >>= 1;
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    if (it < d)
-    {
-      int ai = decale * (2 * it + 1) - 1;
-      int bi = decale * (2 * it + 2) - 1;
-
-      int t = temp[ai];
-      temp[ai] = temp[bi];
-      temp[bi] += t;
-    }
-  }
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  // write results to device memory
-
-  histo[2 * ig] = temp[2 * it];
-  histo[2 * ig + 1] = temp[2 * it + 1];
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
-
-// use the global sum for updating the local histograms
-// each work item updates two values
-__kernel void radixsort_pastehistograms(__global int *histo, __global int *globsum)
-{
-  int ig = get_global_id(0);
-  int gr = get_group_id(0);
-
-  int s;
-
-  s = globsum[gr];
-
-  // write results to device memory
-  histo[2 * ig] += s;
-  histo[2 * ig + 1] += s;
-
-  barrier(CLK_GLOBAL_MEM_FENCE);
-}
diff --git a/docs/conf.py b/docs/conf.py

index ea17db054671f5cffd6fafb49e925981800c7f37..b59cab87805ecd33bc218cb7286b5abcd502a812 100644 (file)
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@ copyright = '2020, Samsung Research & contributors'
  author = 'Samsung Research & contributors'
  
  # The full version, including alpha/beta/rc tags
-release = '1.17.0'
+release = '1.18.0'
  
  # -- General configuration ---------------------------------------------------
  
diff --git a/docs/release/1.18/index.rst b/docs/release/1.18/index.rst

new file mode 100644 (file)

index 0000000..71c4658
--- /dev/null
+++ b/docs/release/1.18/index.rst
@@ -0,0 +1,13 @@
+.. ONE documentation master file, created by
+   sphinx-quickstart on Fri Oct 20 15:20:12 2021.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+1.18
+====
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+  ./release-note-1.18.0.md
diff --git a/docs/release/1.18/release-note-1.18.0.md b/docs/release/1.18/release-note-1.18.0.md

new file mode 100644 (file)

index 0000000..a10f10e
--- /dev/null
+++ b/docs/release/1.18/release-note-1.18.0.md
@@ -0,0 +1,11 @@
+# Release Note 1.18.0
+
+## ONE Compiler
+
+### Compiler Frontend
+
+- More optimization pass
+  - Fold DepthwiseConv2D
+  - Substitute SplitV to Split
+  - Expand BroadCast Const
+  - Force QuantParam
diff --git a/infra/cmake/modules/ExternalSourceTools.cmake b/infra/cmake/modules/ExternalSourceTools.cmake

index 0bfbaa33ba8b90e57bdd771e48968399ab59158a..c8ca57520b11388b54a82a0007181a2c0dbc9b1d 100644 (file)
--- a/infra/cmake/modules/ExternalSourceTools.cmake
+++ b/infra/cmake/modules/ExternalSourceTools.cmake
@@ -103,7 +103,13 @@ function(ExternalSource_Download PREFIX)
  
      message(STATUS "Extract ${PREFIX}")
      execute_process(COMMAND ${CMAKE_COMMAND} -E tar xfz "${DOWNLOAD_PATH}"
-                    WORKING_DIRECTORY "${TMP_DIR}")
+                    WORKING_DIRECTORY "${TMP_DIR}"
+                    ERROR_VARIABLE EXTRACTION_ERROR)
+
+    if(EXTRACTION_ERROR)
+      message(FATAL_ERROR "Extract ${PREFIX} - failed")
+    endif(EXTRACTION_ERROR)
+
      file(REMOVE "${DOWNLOAD_PATH}")
      message(STATUS "Extract ${PREFIX} - done")
  
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake

new file mode 100644 (file)

index 0000000..b48239f
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfig.cmake
@@ -0,0 +1,13 @@
+function(_CMSISSource_import)
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(CMSIS_5_8_0_URL https://github.com/ARM-software/CMSIS_5/archive/refs/tags/5.8.0.tar.gz)
+
+  ExternalSource_Download(CMSIS DIRNAME CMSIS-5.8.0 ${CMSIS_5_8_0_URL})
+
+  set(CMSISSource_DIR ${CMSIS_SOURCE_DIR} PARENT_SCOPE)
+  set(CMSISSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_CMSISSource_import)
+
+_CMSISSource_import()
diff --git a/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..ca6f782
--- /dev/null
+++ b/infra/cmake/packages/CMSISSource-5.8.0/CMSISSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "5.8.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake

new file mode 100644 (file)

index 0000000..0eb8eb9
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfig.cmake
@@ -0,0 +1,118 @@
+function(_FlatBuffers_import)
+  find_package(Flatbuffers QUIET)
+  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+  if(NOT BUILD_FLATBUFFERS)
+    message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
+    return()
+  endif(NOT BUILD_FLATBUFFERS)
+
+  nnas_find_package(FlatBuffersSource EXACT 1.10 QUIET)
+
+  if(NOT FlatBuffersSource_FOUND)
+    # Source is not available
+    message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
+    return()
+  endif(NOT FlatBuffersSource_FOUND)
+
+  set(ADDITIONAL_CXX_FLAGS "")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+  endif()
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
+                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+                      IDENTIFIER  "1.10-fix4"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      PKG_NAME    "FLATBUFFERS-1.10")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+if(FlatBuffers_FOUND)
+  if(NOT TARGET flatbuffers-1.10)
+    add_library(flatbuffers-1.10 INTERFACE)
+    target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
+    message(STATUS "Found FlatBuffers-1.10: TRUE")
+  endif(NOT TARGET flatbuffers-1.10)
+
+  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+    foreach(schema ${ARGN})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+                       --no-union-value-namespacing
+                       --gen-object-api -o "${abs_output_dir}"
+                       ${SCHEMA_FILES}
+                       DEPENDS flatbuffers::flatc)
+
+    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+  endfunction(FlatBuffers_Generate)
+
+  function(FlatBuffers_Target TGT)
+    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+    set(multiValueArgs SCHEMA_FILES)
+    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+    if(NOT ARG_INCLUDE_DIR)
+      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+    endif(NOT ARG_INCLUDE_DIR)
+
+    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+    # Let's reset list variables before using them
+    # NOTE THIS DOES NOT AFFECT parent scope
+    unset(SCHEMA_FILES)
+    unset(OUTPUT_FILES)
+
+    foreach(schema ${ARG_SCHEMA_FILES})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    # Generate headers
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+                               --no-union-value-namespacing
+                               --gen-object-api -o "${abs_output_dir}"
+                               ${SCHEMA_FILES}
+                       DEPENDS ${SCHEMA_FILES}
+                       COMMENT "Generate '${TGT}' headers")
+
+    # NOTE This header-only library is deliberately declared as STATIC library
+    #      to avoid possible scope issues related with generated files
+    add_library(${TGT} STATIC ${OUTPUT_FILES})
+    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+    target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
+  endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake

new file mode 100644 (file)

index 0000000..6585f21
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.10/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "1.10")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake

new file mode 100644 (file)

index 0000000..daa749c
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfig.cmake
@@ -0,0 +1,118 @@
+function(_FlatBuffers_import)
+  find_package(Flatbuffers QUIET)
+  set(FlatBuffers_FOUND ${Flatbuffers_FOUND} PARENT_SCOPE)
+endfunction(_FlatBuffers_import)
+
+function(_FlatBuffers_build)
+  if(NOT BUILD_FLATBUFFERS)
+    message(STATUS "FlatBuffersConfig !BUILD_FLATBUFFERS")
+    return()
+  endif(NOT BUILD_FLATBUFFERS)
+
+  nnas_find_package(FlatBuffersSource EXACT 1.12 QUIET)
+
+  if(NOT FlatBuffersSource_FOUND)
+    # Source is not available
+    message(STATUS "FlatBuffersConfig !FlatBuffersSource_FOUND")
+    return()
+  endif(NOT FlatBuffersSource_FOUND)
+
+  set(ADDITIONAL_CXX_FLAGS "")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
+    set(ADDITIONAL_CXX_FLAGS "-Wno-error=class-memaccess")
+  endif()
+
+  nnas_include(ExternalBuildTools)
+  ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.12/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.12
+                      BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
+                      IDENTIFIER  "1.12-fix1"
+                      EXTRA_OPTS  "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      PKG_NAME    "FLATBUFFERS-1.12")
+
+endfunction(_FlatBuffers_build)
+
+_FlatBuffers_build()
+_FlatBuffers_import()
+
+if(FlatBuffers_FOUND)
+  if(NOT TARGET flatbuffers-1.12)
+    add_library(flatbuffers-1.12 INTERFACE)
+    target_link_libraries(flatbuffers-1.12 INTERFACE flatbuffers::flatbuffers)
+    message(STATUS "Found FlatBuffers-1.12: TRUE")
+  endif(NOT TARGET flatbuffers-1.12)
+
+  function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
+    get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${SCHEMA_DIR} ABSOLUTE)
+
+    foreach(schema ${ARGN})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+                       --no-union-value-namespacing
+                       --gen-object-api -o "${abs_output_dir}"
+                       ${SCHEMA_FILES}
+                       DEPENDS flatbuffers::flatc)
+
+    set(${PREFIX}_SOURCES ${OUTPUT_FILES} PARENT_SCOPE)
+    set(${PREFIX}_INCLUDE_DIRS ${abs_output_dir} PARENT_SCOPE)
+  endfunction(FlatBuffers_Generate)
+
+  function(FlatBuffers_Target TGT)
+    set(oneValueArgs OUTPUT_DIR SCHEMA_DIR INCLUDE_DIR)
+    set(multiValueArgs SCHEMA_FILES)
+    cmake_parse_arguments(ARG "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    # Use OUTPUT_DIR as INCLUDE_DIR if INCLUDE_DIR is not specified
+    if(NOT ARG_INCLUDE_DIR)
+      set(ARG_INCLUDE_DIR ${ARG_OUTPUT_DIR})
+    endif(NOT ARG_INCLUDE_DIR)
+
+    get_filename_component(abs_output_dir ${ARG_OUTPUT_DIR} ABSOLUTE)
+    get_filename_component(abs_include_dir ${ARG_INCLUDE_DIR} ABSOLUTE)
+    get_filename_component(abs_schema_dir ${ARG_SCHEMA_DIR} ABSOLUTE)
+
+    # Let's reset list variables before using them
+    # NOTE THIS DOES NOT AFFECT parent scope
+    unset(SCHEMA_FILES)
+    unset(OUTPUT_FILES)
+
+    foreach(schema ${ARG_SCHEMA_FILES})
+      get_filename_component(schema_fn "${schema}" NAME)
+      get_filename_component(dir "${schema}" DIRECTORY)
+
+      get_filename_component(schema_fn_we "${schema_fn}" NAME_WE)
+
+      list(APPEND SCHEMA_FILES "${abs_schema_dir}/${schema}")
+      list(APPEND OUTPUT_FILES "${abs_output_dir}/${schema_fn_we}_generated.h")
+    endforeach()
+
+    # Generate headers
+    add_custom_command(OUTPUT ${OUTPUT_FILES}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory "${abs_output_dir}"
+                       COMMAND "$<TARGET_FILE:flatbuffers::flatc>" -c --no-includes
+                               --no-union-value-namespacing
+                               --gen-object-api -o "${abs_output_dir}"
+                               ${SCHEMA_FILES}
+                       DEPENDS ${SCHEMA_FILES}
+                       COMMENT "Generate '${TGT}' headers")
+
+    # NOTE This header-only library is deliberately declared as STATIC library
+    #      to avoid possible scope issues related with generated files
+    add_library(${TGT} STATIC ${OUTPUT_FILES})
+    set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
+    target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
+    target_link_libraries(${TGT} PUBLIC flatbuffers-1.12)
+  endfunction(FlatBuffers_Target)
+endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake

new file mode 100644 (file)

index 0000000..8cfdbf8
--- /dev/null
+++ b/infra/cmake/packages/FlatBuffers-1.12/FlatBuffersConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "1.12")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/FlatBuffersConfig.cmake b/infra/cmake/packages/FlatBuffersConfig.cmake

index da084e7d3e8e03be38854801cc80e6e3bb5a7392..e551e29c851d5b6bdabc18ea8ab1049b7865c6ba 100644 (file)
--- a/infra/cmake/packages/FlatBuffersConfig.cmake
+++ b/infra/cmake/packages/FlatBuffersConfig.cmake
@@ -5,6 +5,7 @@ endfunction(_FlatBuffers_import)
  
  function(_FlatBuffers_build)
    if(NOT BUILD_FLATBUFFERS)
+    message(STATUS "FlatBuffersConfig skip: BUILD_FLATBUFFERS OFF")
      return()
    endif(NOT BUILD_FLATBUFFERS)
  
@@ -12,6 +13,7 @@ function(_FlatBuffers_build)
  
    if(NOT FlatBuffersSource_FOUND)
      # Source is not available
+    message(STATUS "FlatBuffersConfig skip: FlatBuffersSource not found")
      return()
    endif(NOT FlatBuffersSource_FOUND)
  
@@ -22,12 +24,12 @@ function(_FlatBuffers_build)
  
    nnas_include(ExternalBuildTools)
    ExternalBuild_CMake(CMAKE_DIR   ${FlatBuffersSource_DIR}
-                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS/build
-                      INSTALL_DIR ${EXT_OVERLAY_DIR}
+                      BUILD_DIR   ${CMAKE_BINARY_DIR}/externals/FLATBUFFERS-1.10/build
+                      INSTALL_DIR ${EXT_OVERLAY_DIR}/FLATBUFFERS-1.10
                        BUILD_FLAGS ${ADDITIONAL_CXX_FLAGS}
-                      IDENTIFIER  "1.10-fix2"
-                      EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF"
-                      PKG_NAME    "FLATBUFFERS")
+                      IDENTIFIER  "1.10-fix4"
+                      EXTRA_OPTS "-DFLATBUFFERS_BUILD_TESTS:BOOL=OFF -DPOSITION_INDEPENDENT_CODE:BOOL=ON"
+                      PKG_NAME    "FLATBUFFERS-1.10")
  
  endfunction(_FlatBuffers_build)
  
@@ -35,11 +37,11 @@ _FlatBuffers_build()
  _FlatBuffers_import()
  
  if(FlatBuffers_FOUND)
-  if(NOT TARGET flatbuffers)
-    add_library(flatbuffers INTERFACE)
-    target_link_libraries(flatbuffers INTERFACE flatbuffers::flatbuffers)
-    message(STATUS "Found FlatBuffers: TRUE")
-  endif(NOT TARGET flatbuffers)
+  if(NOT TARGET flatbuffers-1.10)
+    add_library(flatbuffers-1.10 INTERFACE)
+    target_link_libraries(flatbuffers-1.10 INTERFACE flatbuffers::flatbuffers)
+    message(STATUS "Found FlatBuffers-1.10: TRUE")
+  endif(NOT TARGET flatbuffers-1.10)
  
    function(FlatBuffers_Generate PREFIX OUTPUT_DIR SCHEMA_DIR)
      get_filename_component(abs_output_dir ${OUTPUT_DIR} ABSOLUTE)
@@ -111,6 +113,6 @@ if(FlatBuffers_FOUND)
      add_library(${TGT} STATIC ${OUTPUT_FILES})
      set_target_properties(${TGT} PROPERTIES LINKER_LANGUAGE CXX)
      target_include_directories(${TGT} PUBLIC "${ARG_INCLUDE_DIR}")
-    target_link_libraries(${TGT} PUBLIC flatbuffers)
+    target_link_libraries(${TGT} PUBLIC flatbuffers-1.10)
    endfunction(FlatBuffers_Target)
  endif(FlatBuffers_FOUND)
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake

new file mode 100644 (file)

index 0000000..8b17430
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfig.cmake
@@ -0,0 +1,13 @@
+function(_MbedOSSource_import)
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(MBEDOS_6_15_URL https://github.com/ARMmbed/mbed-os/archive/refs/tags/mbed-os-6.15.0.tar.gz)
+
+  ExternalSource_Download(MBEDOS DIRNAME MBEDOS-6.15 ${MBEDOS_6_15_URL})
+
+  set(MbedOSSource_DIR ${MBEDOS_SOURCE_DIR} PARENT_SCOPE)
+  set(MbedOSSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_MbedOSSource_import)
+
+_MbedOSSource_import()
diff --git a/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..acdd54a
--- /dev/null
+++ b/infra/cmake/packages/MbedOSSource-6.15/MbedOSSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "6.15")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake

new file mode 100644 (file)

index 0000000..a9ec75d
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfig.cmake
@@ -0,0 +1,21 @@
+function(_TensorFlowEigenSource_import)
+  if(NOT DOWNLOAD_EIGEN)
+    set(TensorFlowEigenSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_EIGEN)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/eigen3/workspace.bzl.
+  envoption(EXTERNAL_DOWNLOAD_SERVER "https://gitlab.com")
+  envoption(TENSORFLOW_2_6_0_EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/libeigen/eigen/-/archive/12e8d57108c50d8a63605c6eb0144c838c128337/eigen-12e8d57108c50d8a63605c6eb0144c838c128337.tar.gz)
+
+  ExternalSource_Download(EIGEN DIRNAME TENSORFLOW-2.6.0-EIGEN ${TENSORFLOW_2_6_0_EIGEN_URL})
+
+  set(TensorFlowEigenSource_DIR ${EIGEN_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowEigenSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowEigenSource_import)
+
+_TensorFlowEigenSource_import()
diff --git a/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..38ad0aa
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowEigenSource-2.6.0/TensorFlowEigenSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake

new file mode 100644 (file)

index 0000000..b7f3148
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowGEMMLowpSource_import)
+  if(NOT DOWNLOAD_GEMMLOWP)
+    set(TensorFlowGEMMLowpSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_GEMMLOWP)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/gemmlowp/workspace.bzl.
+  envoption(TENSORFLOW_2_6_0_GEMMLOWP_URL https://github.com/google/gemmlowp/archive/fda83bdc38b118cc6b56753bd540caa49e570745.zip)
+
+  ExternalSource_Download(GEMMLOWP DIRNAME TENSORFLOW-2.6.0-GEMMLOWP ${TENSORFLOW_2_6_0_GEMMLOWP_URL})
+
+  set(TensorFlowGEMMLowpSource_DIR ${GEMMLOWP_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowGEMMLowpSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowGEMMLowpSource_import)
+
+_TensorFlowGEMMLowpSource_import()
diff --git a/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..38ad0aa
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowGEMMLowpSource-2.6.0/TensorFlowGEMMLowpSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake

new file mode 100644 (file)

index 0000000..b4dee91
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfig.cmake
@@ -0,0 +1,20 @@
+function(_TensorFlowRuySource_import)
+  if(NOT DOWNLOAD_RUY)
+    set(TensorFlowRuySource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_RUY)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  # Exact version used by TensorFlow v2.6.0.
+  # See tensorflow/third_party/ruy/workspace.bzl
+  envoption(TENSORFLOW_2_6_0_RUY_URL https://github.com/google/ruy/archive/e6c1b8dc8a8b00ee74e7268aac8b18d7260ab1ce.zip)
+
+  ExternalSource_Download(RUY DIRNAME TENSORFLOW-2.6.0-RUY ${TENSORFLOW_2_6_0_RUY_URL})
+
+  set(TensorFlowRuySource_DIR ${RUY_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowRuySource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowRuySource_import)
+
+_TensorFlowRuySource_import()
diff --git a/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..38ad0aa
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowRuySource-2.6.0/TensorFlowRuySourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake

new file mode 100644 (file)

index 0000000..611c7c8
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfig.cmake
@@ -0,0 +1,18 @@
+function(_TensorFlowSource_import)
+  if(NOT DOWNLOAD_TENSORFLOW)
+    set(TensorFlowSource_FOUND FALSE PARENT_SCOPE)
+    return()
+  endif(NOT DOWNLOAD_TENSORFLOW)
+
+  nnas_include(ExternalSourceTools)
+  nnas_include(OptionTools)
+
+  envoption(TENSORFLOW_2_6_0_URL https://github.com/tensorflow/tensorflow/archive/v2.6.0.tar.gz)
+
+  ExternalSource_Download(TENSORFLOW DIRNAME TENSORFLOW-2.6.0 ${TENSORFLOW_2_6_0_URL})
+
+  set(TensorFlowSource_DIR ${TENSORFLOW_SOURCE_DIR} PARENT_SCOPE)
+  set(TensorFlowSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_TensorFlowSource_import)
+
+_TensorFlowSource_import()
diff --git a/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake

new file mode 100644 (file)

index 0000000..38ad0aa
--- /dev/null
+++ b/infra/cmake/packages/TensorFlowSource-2.6.0/TensorFlowSourceConfigVersion.cmake
@@ -0,0 +1,10 @@
+set(PACKAGE_VERSION "2.6.0")
+set(PACKAGE_VERSION_EXACT FALSE)
+set(PACKAGE_VERSION_COMPATIBLE FALSE)
+set(PACKAGE_VERSION_UNSUITABLE TRUE)
+
+if(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
+  set(PACKAGE_VERSION_EXACT TRUE)
+  set(PACKAGE_VERSION_COMPATIBLE TRUE)
+  set(PACKAGE_VERSION_UNSUITABLE FALSE)
+endif(PACKAGE_FIND_VERSION VERSION_EQUAL PACKAGE_VERSION)
diff --git a/infra/debian/compiler/changelog b/infra/debian/compiler/changelog

index 6859255ff2d2f4168b1b23fba26160ac76c9114b..12af5f92818b5787d8e2943689b61c4af03cdd3d 100644 (file)
--- a/infra/debian/compiler/changelog
+++ b/infra/debian/compiler/changelog
@@ -1,3 +1,9 @@
+one (1.18.0) bionic; urgency=medium
+
+  * More optimization pass
+
+ -- seongwoo <mhs4670go@naver.com>  Fri, 15 Oct 2021 15:23:20 +0900
+
  one (1.17.0) bionic; urgency=medium
  
    * More optimization pass
diff --git a/infra/debian/compiler/one-compiler.install b/infra/debian/compiler/one-compiler.install

index ba628545b11cbfd99d64a487b81112c60d6f28b2..cbca4780222a2a3e205755eaadd3db9138c374f2 100644 (file)
--- a/infra/debian/compiler/one-compiler.install
+++ b/infra/debian/compiler/one-compiler.install
@@ -3,7 +3,6 @@
  usr/bin/circle2circle usr/share/one/bin/
  usr/bin/circle_partitioner usr/share/one/bin/
  usr/bin/circle-quantizer usr/share/one/bin/
-usr/bin/conv_mixin_1.8.0.patch usr/share/one/bin/
  usr/bin/generate_bcq_metadata.py usr/share/one/bin/
  usr/bin/generate_bcq_output_arrays.py usr/share/one/bin/
  usr/bin/model2nnpkg.sh usr/share/one/bin/
diff --git a/infra/debian/compiler/one-compiler.links b/infra/debian/compiler/one-compiler.links

index 8b6e542c13986feefd6e0a9386f9f9a30d92c961..9e464352a43b83f892d75151d246017291f3a05c 100644 (file)
--- a/infra/debian/compiler/one-compiler.links
+++ b/infra/debian/compiler/one-compiler.links
@@ -13,4 +13,5 @@ usr/share/one/lib/libluci_log.so usr/lib/libluci_log.so
  usr/share/one/lib/libluci_partition.so usr/lib/libluci_partition.so
  usr/share/one/lib/libluci_pass.so usr/lib/libluci_pass.so
  usr/share/one/lib/libluci_profile.so usr/lib/libluci_profile.so
+usr/share/one/lib/libluci_plan.so usr/lib/libluci_plan.so
  usr/share/one/lib/libluci_service.so usr/lib/libluci_service.so
diff --git a/infra/debian/compiler/rules b/infra/debian/compiler/rules

index 21b956b2fd12225d692fab0549059dec256c6be0..e42faae09557411edd329384ad03c113957ea686 100755 (executable)
--- a/infra/debian/compiler/rules
+++ b/infra/debian/compiler/rules
@@ -1,7 +1,7 @@
  #!/usr/bin/make -f
  export DH_VERBOSE = 1
  export NNAS_BUILD_PREFIX = build
-export PRESET = 20210706
+export PRESET = 20210910
  export _DESTDIR = debian/tmp/usr
  
  %:
diff --git a/infra/debian/runtime/changelog b/infra/debian/runtime/changelog

index 4a41d959c4ca536ef7761c03f0d6f553102898bc..ee0d3e6ee2a8145d4a330afe2b7c58b4bb163c07 100644 (file)
--- a/infra/debian/runtime/changelog
+++ b/infra/debian/runtime/changelog
@@ -1,3 +1,9 @@
+one (1.18.0) bionic; urgency=low
+
+  * Synch up version with ONE Compiler
+
+ --  Chunseok Lee <chunseok.lee@samsung.com>  Fri, 15 Oct 2021 15:23:00 +0900
+
  one (1.17.0) bionic; urgency=low
  
    * New gpu_gl backend supports the following operations : Add, Convolution, Depthwise Convolution, Pooling, Reshape, Relu, Softmax
diff --git a/infra/nncc/CMakeLists.txt b/infra/nncc/CMakeLists.txt

index eb279902e372733b7f51519cb11d8ed3d1bd826e..bde68493807e75395890a1d87b885dd16846116d 100644 (file)
--- a/infra/nncc/CMakeLists.txt
+++ b/infra/nncc/CMakeLists.txt
@@ -130,6 +130,11 @@ option(ENABLE_STRICT_BUILD "Treat warning as error" OFF)
  # Check our ProtobufConfig.cmake for its usage.
  option(USE_PROTOBUF_LEGACY_IMPORT "Use legacy MODULE mode import rather than CONFIG mode" OFF)
  
+# This option might be turned ON for MCU builds of luci related components.
+# It specify which library type to use for build:
+# if set ON - luci libraries are static, otherwise - shared.
+option(STATIC_LUCI "Build luci as a static libraries" OFF)
+
  ###
  ### Target
  ###
diff --git a/infra/nncc/command/utcount b/infra/nncc/command/utcount

index 64aaace9bc372f170e82d8468c603434e94e2fbe..65aea8bae1f0dacc13a2881dc1d7ab4ce251abca 100644 (file)
--- a/infra/nncc/command/utcount
+++ b/infra/nncc/command/utcount
@@ -14,7 +14,7 @@ oops pepper-assert \
  hermes hermes-std \
  loco locop locomotiv logo-core logo \
  foder souschef arser vconone crew \
-safemain mio-circle mio-tflite \
+safemain mio-circle mio-tflite mio-tflite260 \
  tflite2circle \
  luci \
  luci-interpreter \
diff --git a/infra/packaging/build b/infra/packaging/build

index 8d3230010f182eaf02d223e3b26fc535e2fe7ac1..53d63713b12aabf0e8b560ad16ad0fb2c109ec5e 100644 (file)
--- a/infra/packaging/build
+++ b/infra/packaging/build
@@ -8,7 +8,7 @@ if [[ -z "${NNAS_PROJECT_PATH}" ]]; then
  fi
  
  # The default preset
-PRESET="20210706"
+PRESET="20210910"
  
  EXTRA_OPTIONS=()
  while [ "$#" -ne 0 ]; do
diff --git a/infra/packaging/preset/20210910 b/infra/packaging/preset/20210910

new file mode 100644 (file)

index 0000000..d00b1cc
--- /dev/null
+++ b/infra/packaging/preset/20210910
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# NOTE purpose of this file is static analysis only
+#      new official preset will be added when new programs are ready
+
+PRESET="20210910"
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("circle-tensordump" "circledump")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=${NPROC:-$(cat /proc/cpuinfo | grep -c processor)}
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.${PRESET}" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+}
diff --git a/infra/packaging/preset/20210910_windows b/infra/packaging/preset/20210910_windows

new file mode 100644 (file)

index 0000000..642bdbd
--- /dev/null
+++ b/infra/packaging/preset/20210910_windows
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+function preset_configure()
+{
+  REQUIRED_UNITS=()
+  # Common Libraries
+  REQUIRED_UNITS+=("angkor" "cwrap" "pepper-str" "pepper-strcast" "pp")
+  REQUIRED_UNITS+=("oops" "pepper-assert" "pepper-csv2vec" "foder" "crew")
+  REQUIRED_UNITS+=("souschef")
+  REQUIRED_UNITS+=("safemain")
+  REQUIRED_UNITS+=("arser")
+  REQUIRED_UNITS+=("vconone")
+  # Hermes Logging Framework
+  REQUIRED_UNITS+=("hermes" "hermes-std")
+  # loco IR and related utilities
+  REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
+  # Flatbuffer I/O
+  REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
+  # Circle compiler library (.circle -> .circle)
+  REQUIRED_UNITS+=("luci")
+  # Tools
+  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "tflchef" "circlechef")
+  REQUIRED_UNITS+=("tf2tfliteV2" "luci-interpreter" "circle-verify")
+  REQUIRED_UNITS+=("luci-eval-driver")
+  REQUIRED_UNITS+=("record-minmax" "circle-quantizer" "rawdata2hdf5")
+  REQUIRED_UNITS+=("circle-partitioner")
+  REQUIRED_UNITS+=("one-cmds")
+  REQUIRED_UNITS+=("bcq-tools")
+
+  NPROC=$(cat /proc/cpuinfo | grep -c processor)
+
+  # TODO Use "nncc configure" and "nncc build"
+  cmake \
+    -G "MSYS Makefiles" \
+    -DUSE_PROTOBUF_LEGACY_IMPORT=ON \
+    -DCMAKE_EXE_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DCMAKE_SHARED_LINKER_FLAGS="-Wl,--allow-multiple-definition" \
+    -DENABLE_TEST=OFF \
+    -DDOWNLOAD_GTEST=OFF \
+    -DBUILD_GTEST=OFF \
+    -DCMAKE_C_COMPILER=gcc \
+    -DCMAKE_CXX_COMPILER=g++ \
+    -DCMAKE_INSTALL_PREFIX="${NNCC_INSTALL_PREFIX}" \
+    -DCMAKE_BUILD_TYPE=release \
+    -DBUILD_WHITELIST=$(join_by ";" "${REQUIRED_UNITS[@]}") \
+    -DEXTERNALS_BUILD_THREADS=$((NPROC/2)) \
+    ${EXTRA_OPTIONS[@]} \
+    "${NNAS_PROJECT_PATH}/infra/nncc"
+}
+
+function preset_install()
+{
+  # Install libraries to bin/ for Windows release
+  mv ${NNCC_INSTALL_PREFIX}/lib/*.dll ${NNCC_INSTALL_PREFIX}/bin
+  rm -rf ${NNCC_INSTALL_PREFIX}/lib
+
+  install -t "${NNPKG_INSTALL_PREFIX}/bin" -D \
+    "${NNAS_PROJECT_PATH}/tools/nnpackage_tool/model2nnpkg/model2nnpkg.sh"
+
+  # Install tf2nnpkg
+  install -T -m 755 -D "${SCRIPT_PATH}/res/tf2nnpkg.20210910" "${NNAS_INSTALL_PREFIX}/bin/tf2nnpkg"
+
+  # Though you have to install tensorflow to run 'tf2tfliteV2',
+  # tensorflow can't be installed in mingw. First, You can install tensorflow 
+  # from Window native CMD(run as administrator) with python virtual environment.
+  # And, you must copy it to "${NNAS_INSTALL_PREFIX}/bin/venv"
+}
diff --git a/infra/packaging/res/tf2nnpkg.20210910 b/infra/packaging/res/tf2nnpkg.20210910

new file mode 100644 (file)

index 0000000..0d44818
--- /dev/null
+++ b/infra/packaging/res/tf2nnpkg.20210910
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+set -e
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+
+command_exists() {
+  if [ "$#" -le 0 ]; then
+    return 1
+  fi
+  command -v "$@" > /dev/null 2>&1
+}
+
+usage()
+{
+  echo "Convert TensorFlow model to nnpackage."
+  echo "Usage: tf2nnpkg"
+  echo "    --info <path/to/info>"
+  echo "    --graphdef <path/to/pb>"
+  echo "    -o <path/to/nnpkg/directory>"
+  echo "    --v2 (optional) Use TF 2.x interface"
+  exit 255
+}
+
+TF_INTERFACE="--v1"
+
+# Parse command-line arguments
+#
+while [ "$#" -ne 0 ]; do
+  CUR="$1"
+
+  case $CUR in
+    '--help')
+      usage
+      ;;
+    '--info')
+      export INFO_FILE="$2"
+      shift 2
+      ;;
+    '--graphdef')
+      export GRAPHDEF_FILE="$2"
+      shift 2
+      ;;
+    '-o')
+      export OUTPUT_DIR="$2"
+      shift 2
+      ;;
+    '--v2')
+      TF_INTERFACE="--v2"
+      shift
+      ;;
+    *)
+      echo "${CUR}"
+      shift
+      ;;
+  esac
+done
+
+if [ -z ${GRAPHDEF_FILE} ] || [ ! -e ${GRAPHDEF_FILE} ]; then
+  echo "pb is not found. Please check --graphdef is correct."
+  exit 2
+fi
+
+if [ -z ${INFO_FILE} ] || [ ! -e ${INFO_FILE} ]; then
+  echo "info is not found. Please check --info is correct."
+  exit 2
+fi
+
+if [ -z ${OUTPUT_DIR} ]; then
+  echo "output directory is not specifed. Please check -o is correct.."
+  exit 2
+fi
+
+FILE_BASE=$(basename ${GRAPHDEF_FILE})
+MODEL_NAME="${FILE_BASE%.*}"
+TMPDIR=$(mktemp -d)
+trap "{ rm -rf $TMPDIR; }" EXIT
+
+# activate python virtual environment
+VIRTUALENV_LINUX="${ROOT}/bin/venv/bin/activate"
+VIRTUALENV_WINDOWS="${ROOT}/bin/venv/Scripts/activate"
+
+if [ -e ${VIRTUALENV_LINUX} ]; then
+  source ${VIRTUALENV_LINUX}
+elif [ -e ${VIRTUALENV_WINDOWS} ]; then
+  source ${VIRTUALENV_WINDOWS}
+fi
+
+# parse inputs, outputs from info file
+INPUT=$(awk -F, '/^input/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+OUTPUT=$(awk -F, '/^output/ { print $2 }' ${INFO_FILE} | cut -d: -f1 | tr -d ' ' | paste -d, -s)
+
+INPUT_SHAPES=$(grep ^input ${INFO_FILE} | cut -d "[" -f2 | cut -d "]" -f1 | tr -d ' ' | xargs | tr ' ' ':')
+
+ONE_IMPORT_BCQ_SCRIPT="${ROOT}/bin/one-import-bcq ${TF_INTERFACE} "
+ONE_IMPORT_BCQ_SCRIPT+="-i ${GRAPHDEF_FILE} "
+ONE_IMPORT_BCQ_SCRIPT+="-o ${TMPDIR}/${MODEL_NAME}.tmp.circle "
+ONE_IMPORT_BCQ_SCRIPT+="-I ${INPUT} "
+ONE_IMPORT_BCQ_SCRIPT+="-O ${OUTPUT} "
+if [ ! -z ${INPUT_SHAPES} ]; then
+  ONE_IMPORT_BCQ_SCRIPT+="-s ${INPUT_SHAPES} "
+fi
+
+${ONE_IMPORT_BCQ_SCRIPT}
+
+# optimize
+"${ROOT}/bin/circle2circle" --O1 "${TMPDIR}/${MODEL_NAME}.tmp.circle" "${TMPDIR}/${MODEL_NAME}.circle"
+
+"${ROOT}/bin/model2nnpkg.sh" -o "${OUTPUT_DIR}" "${TMPDIR}/${MODEL_NAME}.circle"
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh

index a63140eafba7428a820c12392b9464cdf84a0ad6..e520dd3816ee2c52b371e67de342ad8d47aace21 100644 (file)
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -8,7 +8,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
  DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
  DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
  DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle;mio-tflite;mio-tflite260"
  DEBUG_BUILD_ITEMS+=";tflite2circle"
  DEBUG_BUILD_ITEMS+=";luci"
  DEBUG_BUILD_ITEMS+=";luci-interpreter"
diff --git a/infra/scripts/docker_collect_nnpkg_resources.sh b/infra/scripts/docker_collect_nnpkg_resources.sh

index 65963f4b85aad2f53bbe9f88c33749202fb09265..475da6d06a8fa1f64d071dc7df78df77b5c8d526 100755 (executable)
--- a/infra/scripts/docker_collect_nnpkg_resources.sh
+++ b/infra/scripts/docker_collect_nnpkg_resources.sh
@@ -71,7 +71,7 @@ REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
  # Circle compiler library (.circle -> .circle)
  REQUIRED_UNITS+=("luci")
  # Flatbuffer I/O
-REQUIRED_UNITS+=("mio-tflite" "mio-circle")
+REQUIRED_UNITS+=("mio-tflite" "mio-tflite260" "mio-circle")
  # Tools
  REQUIRED_UNITS+=("tflite2circle" "circle2circle" "luci-interpreter")
  REQUIRED_UNITS+=("souschef" "tflchef" "circlechef" "circle-verify")
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec

index 0d170e7ed71c6c5795596491f5d350871478846f..4133d7a06d7d036922591b869919b6e07e60d363 100644 (file)
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -1,9 +1,9 @@
  Name:    nnfw
  Summary: nnfw
-Version: 1.17.0
+Version: 1.18.0
  Release: 1
  Group:   Development
-License: Apache-2.0 and MIT and BSD-2-Clause
+License: Apache-2.0 and MIT and BSD-2-Clause and MPL-2.0
  
  Source0: %{name}-%{version}.tar.gz
  Source1: %{name}.manifest
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.recipe b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe

new file mode 100644 (file)

index 0000000..0eafec9
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.recipe
@@ -0,0 +1,68 @@
+operand {
+  name: "ifm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "relu"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operand {
+  name: "padding"
+  type: INT32
+  shape { dim: 4 dim: 2 }
+  filler {
+    tag: "explicit"
+    arg: "0" arg: "0"
+    arg: "1" arg: "1"
+    arg: "1" arg: "1"
+    arg: "0" arg: "0"
+  }
+}
+operand {
+  name: "constant_values"
+  type: FLOAT32
+  shape { dim: 1 }
+  filler {
+    tag: "explicit"
+    arg: "-100.00"
+  }
+}
+operand {
+  name: "padv2"
+  type: FLOAT32
+  shape { dim: 1 dim: 5 dim: 5 dim: 2 }
+}
+operand {
+  name: "ofm"
+  type: FLOAT32
+  shape { dim: 1 dim: 3 dim: 3 dim: 2 }
+}
+operation {
+  type: "ReLU"
+  input: "ifm"
+  output: "relu"
+}
+operation {
+  type: "PadV2"
+  input: "relu"
+  input: "padding"
+  input: "constant_values"
+  output: "padv2"
+}
+operation {
+  type: "MaxPool2D"
+  maxpool2d_options {
+    padding: VALID
+    stride_w: 1
+    stride_h: 1
+    filter_height: 3
+    filter_width: 3
+  }
+  input: "padv2"
+  output: "ofm"
+}
+
+input: "ifm"
+output: "ofm"
diff --git a/res/TensorFlowLiteRecipes/PadV2_001/test.rule b/res/TensorFlowLiteRecipes/PadV2_001/test.rule

new file mode 100644 (file)

index 0000000..29b080b
--- /dev/null
+++ b/res/TensorFlowLiteRecipes/PadV2_001/test.rule
@@ -0,0 +1,8 @@
+# To check if PadV2 is converted to Pad
+
+RULE    "VERIFY_FILE_FORMAT"      $(verify_file_format) '=' 1
+
+RULE    "PAD_EXIST"               $(op_count PAD) '=' 1
+RULE    "MAXPOOL2D_EXIST"         $(op_count MAX_POOL_2D) '=' 1
+RULE    "RELU_EXIST"              $(op_count RELU) '=' 1
+RULE    "NO_PADV2"                $(op_count PADV2) '=' 0
diff --git a/res/TensorFlowLiteSchema/2.6.0/schema.fbs b/res/TensorFlowLiteSchema/2.6.0/schema.fbs

new file mode 100644 (file)

index 0000000..6fc51f8
--- /dev/null
+++ b/res/TensorFlowLiteSchema/2.6.0/schema.fbs
@@ -0,0 +1,1240 @@
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+// Version 0: Initial version.
+// Version 1: Add subgraphs to schema.
+// Version 2: Rename operators to conform to NN API.
+// Version 3: Move buffer data from Model.Subgraph.Tensors to Model.Buffers.
+// Version 3a: Add new builtin op code field. Has backward compatibility with
+//             version 3.
+
+namespace tflite;
+
+// This corresponds to the version.
+file_identifier "TFL3";
+// File extension of any written files.
+file_extension "tflite";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  min:[float];  // For importing back into tensorflow.
+  max:[float];  // For importing back into tensorflow.
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+}
+
+enum Padding : byte { SAME, VALID }
+
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+}
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 4.
+  asymmetric_quantize_inputs:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adj_x:bool;
+  adj_y:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Exported method name for this signature.
+  method_name:string;
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  key:string;
+
+  // Subgraph index of the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;
diff --git a/res/TensorFlowLiteSchema/SCHEMA.lst b/res/TensorFlowLiteSchema/SCHEMA.lst

index 73dfacd7b92318b0f706f186242b66470a230803..609ef4b0b91fcf933a625dea64a7d85c8fbdb8f0 100644 (file)
--- a/res/TensorFlowLiteSchema/SCHEMA.lst
+++ b/res/TensorFlowLiteSchema/SCHEMA.lst
@@ -6,3 +6,4 @@ VERSION,URL
  2.2.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.2.0/tensorflow/lite/schema/schema.fbs
  2.3.0-rc0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0-rc0/tensorflow/lite/schema/schema.fbs
  2.3.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.3.0/tensorflow/lite/schema/schema.fbs
+2.6.0,https://raw.githubusercontent.com/tensorflow/tensorflow/v2.6.0/tensorflow/lite/schema/schema.fbs
diff --git a/runtime/contrib/android/api/build.gradle b/runtime/contrib/android/api/build.gradle

index 2e3955c3ab322573b8d8c788e5cc0b814e05f260..b432929b578eab0013af71f2e5e65cb00cda8481 100644 (file)
--- a/runtime/contrib/android/api/build.gradle
+++ b/runtime/contrib/android/api/build.gradle
@@ -8,7 +8,7 @@ android {
          minSdkVersion 26
          targetSdkVersion 29
          versionCode 1
-        versionName "1.17.0"
+        versionName "1.18.0"
  
          externalNativeBuild {
              ndkBuild {
diff --git a/runtime/libs/ndarray/CMakeLists.txt b/runtime/libs/ndarray/CMakeLists.txt

new file mode 100644 (file)

index 0000000..f88f131
--- /dev/null
+++ b/runtime/libs/ndarray/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_library(ndarray STATIC src/Array.cpp src/ContiguousSpan.cpp)
+
+set_target_properties(ndarray PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(ndarray PUBLIC include)
+#can't make this private because of c++ templates
+target_include_directories(ndarray PUBLIC src)
+
+option(NDARRAY_INLINE_TEMPLATES "Set to ON to disable extern declarations for common types")
+
+if(${NDARRAY_INLINE_TEMPLATES})
+    target_compile_definitions(ndarray PUBLIC -DNDARRAY_INLINE_TEMPLATES=1)
+endif()
+
+target_link_libraries(ndarray PRIVATE nnfw_common)
+target_link_libraries(ndarray PRIVATE nnfw_coverage)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+add_subdirectory(test)
+add_subdirectory(example)
diff --git a/runtime/libs/ndarray/example/CMakeLists.txt b/runtime/libs/ndarray/example/CMakeLists.txt

new file mode 100644 (file)

index 0000000..c4b575d
--- /dev/null
+++ b/runtime/libs/ndarray/example/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_executable(example_no_array example_no_array.cpp)
+
+add_executable(example_array example_array.cpp)
+target_link_libraries(example_array PRIVATE ndarray)
diff --git a/runtime/libs/ndarray/example/example_array.cpp b/runtime/libs/ndarray/example/example_array.cpp

new file mode 100644 (file)

index 0000000..85d2746
--- /dev/null
+++ b/runtime/libs/ndarray/example/example_array.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+#include <iostream>
+#include <iterator>
+
+using namespace ndarray;
+
+void gather_array(const Array<float> &input, Array<float> &output, const Array<int> &indices)
+{
+  assert(indices.shape().rank() == 3);
+  assert(input.shape().rank() == 3);
+  assert(indices.shape().dim(1) == input.shape().rank());
+
+  for (size_t i = 0; i < indices.shape().dim(0); ++i)
+  {
+    for (size_t j = 0; j < indices.shape().dim(1); ++j)
+    {
+      auto index = indices.slice(i, j);
+      output.slice(i, j).assign(input.slice(index[0], index[1]));
+    }
+  }
+}
+
+int main()
+{
+  // fill tensor of shape[3,3,4] with sequential numbers from [0..36)
+  Shape in_shape{3, 3, 4};
+  std::vector<float> input_data(in_shape.element_count());
+  for (size_t i = 0; i < in_shape.element_count(); ++i)
+    input_data[i] = i;
+
+  Array<float> input(input_data.data(), in_shape);
+
+  // select column-vectors on main diagonal
+  Shape indices_shape{1, 3, 2};
+  std::vector<int> indices_data(indices_shape.element_count());
+  Array<int> indices(indices_data.data(), indices_shape);
+
+  indices.slice(0, 0) = {0, 0};
+  indices.slice(0, 1) = {1, 1};
+  indices.slice(0, 2) = {2, 2};
+
+  Shape output_shape{1, 3, 4};
+  std::vector<float> output_data(output_shape.element_count());
+
+  Array<float> output(output_data.data(), output_shape);
+
+  gather_array(input, output, indices);
+
+  for (size_t i = 0; i < indices_shape.dim(0); ++i)
+  {
+    for (size_t j = 0; j < indices_shape.dim(1); ++j)
+    {
+      auto output_piece = output.slice(i, j);
+      std::ostream_iterator<int> cout_it(std::cout, ", ");
+      std::copy(output_piece.begin(), output_piece.end(), cout_it);
+      std::cout << std::endl;
+    }
+  }
+}
diff --git a/runtime/libs/ndarray/example/example_no_array.cpp b/runtime/libs/ndarray/example/example_no_array.cpp

new file mode 100644 (file)

index 0000000..3a4d05d
--- /dev/null
+++ b/runtime/libs/ndarray/example/example_no_array.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <array>
+#include <vector>
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+
+void gather_no_array(const float *in_data, const std::array<size_t, 3> &dims, float *out_data,
+                     const std::array<size_t, 3> &out_dims, //[nselections,
+                     const int *indices, const std::array<size_t, 3> &indices_dims)
+{
+  assert(indices_dims[1] == dims.size());
+
+  for (int i = 0; i < indices_dims[0]; ++i)
+  {
+    for (int j = 0; j < indices_dims[1]; ++j)
+    {
+      const int *index_ptr = indices + i * indices_dims[2] * indices_dims[1] + j * indices_dims[2];
+
+      size_t in_offset = index_ptr[0] * dims[2] * dims[1] + index_ptr[1] * dims[2];
+
+      const float *in_ptr = in_data + in_offset;
+
+      size_t out_offset = i * out_dims[2] * out_dims[1] + j * out_dims[2];
+
+      float *out_ptr = out_data + out_offset;
+
+      for (int k = 0; k < dims[2]; ++k)
+      {
+        out_ptr[k] = in_ptr[k];
+      }
+    }
+  }
+}
+
+int main()
+{
+  std::array<size_t, 3> in_dims{3, 3, 4};
+  std::vector<float> input(3 * 3 * 4);
+  for (size_t i = 0; i < 3 * 3 * 4; ++i)
+    input[i] = i;
+
+  std::array<size_t, 3> indices_shape{1, 3, 2};
+  std::vector<int> indices(1 * 3 * 2);
+
+  indices[0] = 0;
+  indices[1] = 0;
+  indices[2] = 1;
+  indices[3] = 1;
+  indices[4] = 2;
+  indices[5] = 2;
+
+  std::array<size_t, 3> output_dims{1, 3, 4};
+  std::vector<float> output(1 * 3 * 4);
+
+  gather_no_array(input.data(), in_dims, output.data(), output_dims, indices.data(), indices_shape);
+
+  for (size_t i = 0; i < output_dims[0]; ++i)
+  {
+    for (size_t j = 0; j < output_dims[1]; ++j)
+    {
+      auto out_ptr = output.data() + i * output_dims[1] * output_dims[2] + j * output_dims[2];
+      for (size_t k = 0; k < output_dims[2]; ++k)
+      {
+        std::cout << out_ptr[k] << ", ";
+      }
+      std::cout << std::endl;
+    }
+  }
+}
diff --git a/runtime/libs/ndarray/include/ndarray/Array.h b/runtime/libs/ndarray/include/ndarray/Array.h

new file mode 100644 (file)

index 0000000..09e7917
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Array.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_ARRAY_H_
+#define _NDARRAY_ARRAY_H_
+
+#include "Common.h"
+
+#include "ContiguousSpan.h"
+#include "Shape.h"
+
+#if __cplusplus < 201402L
+#include "detail/cxx14.h" //integer_sequence and make_index_dequence definitions
+#else
+#include <utility>
+#endif
+
+#include <algorithm>
+#include <cassert>
+#include <type_traits>
+#include <array>
+#include <tuple>
+#include <cstddef>
+
+namespace ndarray
+{
+
+// there is no index_sequence before c++14
+#if __cplusplus < 201402L
+
+template <size_t... Nums> using index_sequence = cxx14::index_sequence<Nums...>;
+
+template <size_t Num> using make_index_sequence = cxx14::make_index_sequence<Num>;
+
+#else
+
+template <size_t... Nums> using index_sequence = std::index_sequence<Nums...>;
+
+template <size_t _Num> using make_index_sequence = std::make_index_sequence<_Num>;
+
+#endif //__cplusplus < 201402L
+
+struct Strides
+{
+  explicit Strides(Shape s) : _strides{} { fillStrides(s); }
+
+  int operator[](size_t idx) const noexcept { return _strides[idx]; }
+
+  // since we don't have c++14 fold expression
+  template <typename Seq, typename... Ts> struct _calc_offset;
+
+  template <size_t Num, size_t... Nums, typename T, typename... Ts>
+  struct _calc_offset<index_sequence<Num, Nums...>, T, Ts...>
+  {
+    static constexpr size_t get(const std::array<int, 8> &strides, int x, Ts... xs)
+    {
+      return _calc_offset<index_sequence<Nums...>, Ts...>::get(strides, xs...) +
+             x * std::get<Num>(strides);
+    }
+  };
+
+  template <size_t Num, typename T> struct _calc_offset<index_sequence<Num>, T>
+  {
+    static constexpr size_t get(const std::array<int, 8> &strides, int x)
+    {
+      return x * std::get<Num>(strides);
+    }
+  };
+
+  template <typename Seq, typename... Ts> constexpr size_t offset(Seq, Ts... x) const noexcept
+  {
+    // return ( 0 + ... + (std::get<Nums>(_strides) * x)); in c++14
+    return _calc_offset<Seq, Ts...>::get(_strides, x...);
+  }
+
+private:
+  void fillStrides(const Shape &s) noexcept
+  {
+    int rank = s.rank();
+    _strides[rank - 1] = 1;
+    for (int d = rank - 2; d >= 0; --d)
+    {
+      _strides[d] = _strides[d + 1] * s.dim(d + 1);
+    }
+  }
+
+  std::array<int, NDARRAY_MAX_DIMENSION_COUNT> _strides;
+};
+
+template <typename T> class Array
+{
+public:
+  Array(T *data, Shape shape) noexcept : _data(data), _shape(shape), _strides(shape) {}
+
+  Array(const Array &) = delete;
+
+  Array(Array &&a) noexcept : _data(a._data), _shape(a._shape), _strides(a._strides)
+  {
+    a._data = nullptr;
+  }
+
+  template <typename... Ts> T &at(Ts... x) const noexcept { return _at(static_cast<size_t>(x)...); }
+
+  /**
+   * @brief returns last dimension as ContigniousSpan
+   * @param x indices of slice to take. See tests for usage details
+   * @return slice at given position
+   */
+  template <typename... Ts> ContiguousSpan<T, std::is_const<T>::value> slice(Ts... x) noexcept
+  {
+    assert(sizeof...(Ts) == _shape.rank() - 1);
+    return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
+  }
+
+  /**
+   * @brief returns last dimension as ContigniousSpan
+   * @param x indices of slice to take. See tests for usage details
+   * @return slice at given position
+   */
+  template <typename... Ts> ContiguousSpan<T, true> slice(Ts... x) const noexcept
+  {
+    assert(sizeof...(Ts) == _shape.rank() - 1);
+    return {&at(x..., 0ul), _shape.dim(_shape.rank() - 1)};
+  }
+
+  ContiguousSpan<T, std::is_const<T>::value> flat() noexcept
+  {
+    return {_data, _shape.element_count()};
+  }
+
+  ContiguousSpan<T, true> flat() const noexcept { return {_data, _shape.element_count()}; }
+
+  const Shape &shape() const noexcept { return _shape; }
+
+private:
+  template <typename... Ts> T &_at(Ts... x) const noexcept
+  {
+    assert(sizeof...(x) == _shape.rank());
+    using Indices = make_index_sequence<sizeof...(Ts)>;
+    return _data[offset(Indices{}, x...)];
+  }
+
+  template <typename... Ts, size_t... Nums>
+  size_t offset(index_sequence<Nums...> seq, Ts... x) const noexcept
+  {
+    static_assert(
+      sizeof...(Ts) == sizeof...(Nums),
+      "Sanity check failed. Generated index sequence size is not equal to argument count");
+
+    return _strides.offset(seq, x...);
+  }
+
+  T *_data;
+  Shape _shape;
+  Strides _strides;
+};
+
+template <typename To, typename From> Array<To> array_cast(Array<From> &&from, Shape newShape)
+{
+  assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
+  return Array<To>(reinterpret_cast<To *>(from.flat().data()), newShape);
+}
+
+template <typename To, typename From>
+Array<const To> array_cast(const Array<From> &from, Shape newShape)
+{
+  assert(from.shape().element_count() / (sizeof(To) / sizeof(From)) == newShape.element_count());
+  return Array<To>(reinterpret_cast<const To *>(from.flat().data()), newShape);
+}
+
+#ifndef NDARRAY_INLINE_TEMPLATES
+
+extern template class Array<float>;
+extern template class Array<int32_t>;
+extern template class Array<uint32_t>;
+extern template class Array<uint8_t>;
+
+#endif // NDARRAY_INLINE_TEMPLATES
+
+} // namespace ndarray
+
+#endif //_NDARRAY_ARRAY_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Common.h b/runtime/libs/ndarray/include/ndarray/Common.h

new file mode 100644 (file)

index 0000000..aa0cc6f
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Common.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_COMMON_H_
+#define _NDARRAY_COMMON_H_
+
+#define NDARRAY_MAX_DIMENSION_COUNT 8
+
+#endif //_NDARRAY_COMMON_H_
diff --git a/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h

new file mode 100644 (file)

index 0000000..b322b77
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/ContiguousSpan.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_CONTIGNIOUS_SPAN_H_
+#define _NDARRAY_CONTIGNIOUS_SPAN_H_
+
+#include <type_traits>
+#include <vector>
+#include <cstdint>
+#include <cstddef>
+#include <cassert>
+
+namespace ndarray
+{
+
+template <typename T, bool isConst = false> class ContiguousSpan
+{
+public:
+  using pointer_type = typename std::conditional<isConst, const T *, T *>::type;
+  using reference_type = typename std::conditional<isConst, const T &, T &>::type;
+  using iterator_type = pointer_type;
+
+  ContiguousSpan(pointer_type data, size_t len) noexcept : _data(data), _len(len) {}
+
+  template <typename It>
+  explicit ContiguousSpan(It first, It last) noexcept
+    : _data(&*first), _len(std::distance(first, last))
+  {
+  }
+
+  ContiguousSpan(const ContiguousSpan &) = delete;
+
+  ContiguousSpan(ContiguousSpan &&s) noexcept : _data(s._data), _len(s._len) { s._data = nullptr; }
+
+  operator ContiguousSpan<T, true>() { return ContiguousSpan<T, true>{_data, _len}; }
+
+  reference_type operator[](size_t idx) const noexcept { return _data[idx]; }
+
+  reference_type at(size_t idx) const noexcept { return _data[idx]; }
+
+  ContiguousSpan<T, isConst> offset(size_t offset)
+  {
+    assert(offset <= _len);
+    return {_data + offset, _len - offset};
+  }
+
+  template <typename From, bool _ = isConst>
+  typename std::enable_if<!_, void>::type assign(const From &f) noexcept
+  {
+    assignFrom(std::begin(f), std::end(f));
+  }
+
+  template <typename U, bool _ = isConst>
+  typename std::enable_if<!_, ContiguousSpan &>::type
+  operator=(std::initializer_list<U> list) noexcept
+  {
+    assignFrom(std::begin(list), std::end(list));
+    return *this;
+  }
+
+  template <typename It, bool _ = isConst>
+  typename std::enable_if<!_, void>::type assignFrom(It first, It last) noexcept
+  {
+    std::copy(first, last, begin());
+  }
+
+  size_t size() const { return _len; }
+
+  iterator_type begin() const { return iterator_type{_data}; }
+
+  iterator_type end() const { return iterator_type{_data + _len}; }
+
+  pointer_type data() { return _data; }
+
+private:
+  pointer_type _data;
+  size_t _len;
+};
+
+#ifndef NDARRAY_INLINE_TEMPLATES
+
+extern template class ContiguousSpan<float, true>;
+extern template class ContiguousSpan<float, false>;
+extern template class ContiguousSpan<int32_t, true>;
+extern template class ContiguousSpan<int32_t, false>;
+extern template class ContiguousSpan<uint32_t, true>;
+extern template class ContiguousSpan<uint32_t, false>;
+extern template class ContiguousSpan<uint8_t, true>;
+extern template class ContiguousSpan<uint8_t, false>;
+
+#endif // NDARRAY_INLINE_TEMPLATES
+
+} // namespace ndarray
+
+#endif //_NDARRAY_CONTIGNIOUS_SPAN_H_
diff --git a/runtime/libs/ndarray/include/ndarray/Shape.h b/runtime/libs/ndarray/include/ndarray/Shape.h

new file mode 100644 (file)

index 0000000..fa58613
--- /dev/null
+++ b/runtime/libs/ndarray/include/ndarray/Shape.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_SHAPE_H_
+#define _NDARRAY_SHAPE_H_
+
+#include "Common.h"
+
+#include <array>
+#include <cassert>
+#include <cstddef>
+
+namespace ndarray
+{
+
+class Shape
+{
+public:
+  //_dims{} here and later since array does not have std::initializer_list ctor
+  // and aggregate initialization is not allowed here
+  explicit Shape(size_t rank) noexcept : _dims{}, _rank(rank)
+  {
+    std::fill(_dims.begin(), _dims.end(), 0);
+  }
+
+  Shape(std::initializer_list<size_t> list) noexcept : _dims{}, _rank(list.size())
+  {
+    std::copy(list.begin(), list.end(), _dims.begin());
+  }
+
+  size_t dim(int i) const noexcept { return _dims.at(i); }
+
+  size_t &dim(int i) noexcept { return _dims.at(i); }
+
+  size_t element_count() const noexcept
+  {
+    uint32_t res = 1;
+    for (size_t i = 0; i < rank(); ++i)
+      res *= dim(i);
+    assert(res <= 0xffffffff);
+    return res;
+  }
+
+  size_t rank() const noexcept { return _rank; }
+
+private:
+  std::array<size_t, NDARRAY_MAX_DIMENSION_COUNT> _dims;
+  size_t _rank;
+};
+
+} // namespace ndarray
+
+#endif //_NDARRAY_SHAPE_H_
diff --git a/runtime/libs/ndarray/src/Array.cpp b/runtime/libs/ndarray/src/Array.cpp

new file mode 100644 (file)

index 0000000..f9c9de9
--- /dev/null
+++ b/runtime/libs/ndarray/src/Array.cpp
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/Array.h"
+
+namespace ndarray
+{
+
+template class Array<float>;
+template class Array<int32_t>;
+template class Array<uint32_t>;
+template class Array<uint8_t>;
+
+} // namespace ndarray
diff --git a/runtime/libs/ndarray/src/ContiguousSpan.cpp b/runtime/libs/ndarray/src/ContiguousSpan.cpp

new file mode 100644 (file)

index 0000000..e06cfc2
--- /dev/null
+++ b/runtime/libs/ndarray/src/ContiguousSpan.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ndarray/ContiguousSpan.h"
+
+namespace ndarray
+{
+
+template class ContiguousSpan<float, true>;
+template class ContiguousSpan<float, false>;
+template class ContiguousSpan<int32_t, true>;
+template class ContiguousSpan<int32_t, false>;
+template class ContiguousSpan<uint32_t, true>;
+template class ContiguousSpan<uint32_t, false>;
+template class ContiguousSpan<uint8_t, true>;
+template class ContiguousSpan<uint8_t, false>;
+
+} // namespace ndarray
diff --git a/runtime/libs/ndarray/src/detail/cxx14.h b/runtime/libs/ndarray/src/detail/cxx14.h

new file mode 100644 (file)

index 0000000..8b78fb9
--- /dev/null
+++ b/runtime/libs/ndarray/src/detail/cxx14.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NDARRAY_CXX14_H_
+#define _NDARRAY_CXX14_H_
+
+namespace ndarray
+{
+
+namespace cxx14
+{
+
+template <size_t... Nums> struct index_sequence
+{
+  using value_type = size_t;
+
+  static constexpr std::size_t size() noexcept { return sizeof...(Nums); }
+};
+
+namespace detail
+{
+
+template <size_t v, typename Seq> struct _append;
+
+template <size_t v, size_t... Nums> struct _append<v, index_sequence<Nums...>>
+{
+  using result = index_sequence<Nums..., v>;
+};
+
+template <size_t Len> struct make_index_sequence
+{
+  using result =
+    typename detail::_append<Len - 1, typename make_index_sequence<Len - 1>::result>::result;
+};
+
+template <> struct make_index_sequence<1>
+{
+  using result = index_sequence<0>;
+};
+
+template <> struct make_index_sequence<0>
+{
+  using result = index_sequence<>;
+};
+
+} // namespace detail
+
+template <size_t Num> using make_index_sequence = typename detail::make_index_sequence<Num>::result;
+
+} // namespace cxx14
+
+} // namespace ndarray
+
+#endif //_NDARRAY_CXX14_H_
diff --git a/runtime/libs/ndarray/test/CMakeLists.txt b/runtime/libs/ndarray/test/CMakeLists.txt

new file mode 100644 (file)

index 0000000..be1ed65
--- /dev/null
+++ b/runtime/libs/ndarray/test/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(NOT TARGET ndarray)
+    return()
+endif()
+
+add_executable(ndarray_test ndarray_test.cpp)
+
+target_link_libraries(ndarray_test PRIVATE ndarray)
+
+nnfw_find_package(GTest)
+if(NOT GTest_FOUND)
+    message(STATUS "GTest not avaialble. Skipping NDArray test build")
+    return()
+endif(NOT GTest_FOUND)
+
+target_link_libraries(ndarray_test PUBLIC gtest gtest_main ${LIB_PTHREAD})
+
+add_test(ndarray_test ndarray_test)
+install(TARGETS ndarray_test DESTINATION unittest_standalone)
diff --git a/runtime/libs/ndarray/test/ndarray_test.cpp b/runtime/libs/ndarray/test/ndarray_test.cpp

new file mode 100644 (file)

index 0000000..4b5ad57
--- /dev/null
+++ b/runtime/libs/ndarray/test/ndarray_test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "ndarray/Array.h"
+
+using namespace ndarray;
+
+TEST(NDArray_tests, basic_data_test)
+{
+
+  float raw_data[] = {1, 2, 3, 4};
+
+  Array<float> data22{raw_data, {2, 2}};
+
+  ASSERT_FLOAT_EQ(data22.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data22.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data22.at(1, 0), 3);
+  ASSERT_FLOAT_EQ(data22.at(1, 1), 4);
+  ASSERT_EQ(data22.shape().rank(), 2);
+  ASSERT_EQ(data22.shape().dim(0), 2);
+  ASSERT_EQ(data22.shape().dim(1), 2);
+
+  Array<float> data14{raw_data, {1, 4}};
+  ASSERT_FLOAT_EQ(data14.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(data14.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(data14.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(data14.at(0, 3), 4);
+  ASSERT_EQ(data14.shape().rank(), 2);
+  ASSERT_EQ(data14.shape().dim(0), 1);
+  ASSERT_EQ(data14.shape().dim(1), 4);
+
+  ContiguousSpan<float> cs = data22.flat();
+  ASSERT_EQ(cs.size(), 4);
+  ASSERT_FLOAT_EQ(cs.at(3), 4);
+
+  Array<float> lv = std::move(data14);
+  ASSERT_FLOAT_EQ(lv.at(0, 0), 1);
+  ASSERT_FLOAT_EQ(lv.at(0, 1), 2);
+  ASSERT_FLOAT_EQ(lv.at(0, 2), 3);
+  ASSERT_FLOAT_EQ(lv.at(0, 3), 4);
+}
+
+TEST(NDArray_tests, slice_write_test)
+{
+  float raw_data[4] = {0};
+
+  Array<float> data22{raw_data, {2, 2}};
+
+  data22.slice(1) = {1, 2};
+
+  ASSERT_FLOAT_EQ(data22.at(0, 0), 0);
+  ASSERT_FLOAT_EQ(data22.at(0, 1), 0);
+  ASSERT_FLOAT_EQ(data22.at(1, 0), 1);
+  ASSERT_FLOAT_EQ(data22.at(1, 1), 2);
+}
+
+TEST(NDArray_tests, slice_read_test)
+{
+  float raw_data[4] = {1, 2, 3, 4};
+
+  Array<float> data22{raw_data, {2, 2}};
+
+  auto slice = data22.slice(1);
+
+  ASSERT_FLOAT_EQ(slice[0], 3);
+  ASSERT_FLOAT_EQ(slice[1], 4);
+}
+
+TEST(NDArray_tests, multidim_test)
+{
+  float raw_data[5] = {0, 1, 2, 3, 4};
+
+  Array<float> data22{raw_data, {1, 1, 1, 1, 5}};
+
+  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 0), 0);
+  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 1), 1);
+  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 2), 2);
+  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 3), 3);
+  ASSERT_FLOAT_EQ(data22.at(0, 0, 0, 0, 4), 4);
+}
+
+TEST(NDArray_tests, slice_assign_test)
+{
+  std::vector<float> v1{1, 2, 3, 4, 5};
+  std::vector<float> v2(5);
+
+  ContiguousSpan<float> span1(v1.begin(), v1.end());
+  ContiguousSpan<float> span2(v2.begin(), v2.end());
+
+  span2.assign(span1);
+
+  ASSERT_EQ(v1, v2);
+  ASSERT_EQ(span1.size(), 5);
+  ASSERT_EQ(span2.size(), 5);
+
+  ASSERT_EQ(span2.at(2), 3);
+  ASSERT_EQ(span2.at(4), 5);
+
+  ASSERT_EQ(*(span1.data() + 2), *(span1.data() + 2));
+
+  ContiguousSpan<float> span3(span2.offset(1));
+  ASSERT_EQ(span3.size(), 4);
+  ASSERT_EQ(span3.at(0), 2);
+  ASSERT_EQ(span3.at(1), 3);
+  ASSERT_EQ(span3.at(2), 4);
+  ASSERT_EQ(span3.at(3), 5);
+}
diff --git a/runtime/onert/api/include/nnfw_version.h b/runtime/onert/api/include/nnfw_version.h

index 6624ae676c0649943b609c585c025e7397e5c958..4fce291a05f74097fbc3e4f7767cb67a75acda50 100644 (file)
--- a/runtime/onert/api/include/nnfw_version.h
+++ b/runtime/onert/api/include/nnfw_version.h
@@ -21,6 +21,6 @@
   * NNFW_VERSION is a uint32 value representing nnfw runtime version
   * in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch
   */
-#define NNFW_VERSION 0x01001100
+#define NNFW_VERSION 0x01001200
  
  #endif // __NNFW_VERSION_H__
diff --git a/runtime/onert/backend/cpu/CMakeLists.txt b/runtime/onert/backend/cpu/CMakeLists.txt

index 1e5443263896cbd32f01545b6c2ad82d7b6e78ea..b61e5825154b35a66e72b7b0eb4727e93b30dce5 100644 (file)
--- a/runtime/onert/backend/cpu/CMakeLists.txt
+++ b/runtime/onert/backend/cpu/CMakeLists.txt
@@ -12,6 +12,7 @@ target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_common)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE nnfw_coverage)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ruy)
  target_link_libraries(${LIB_ONERT_BACKEND_CPU} INTERFACE ruy_instrumentation)
+target_link_libraries(${LIB_ONERT_BACKEND_CPU} PRIVATE ndarray)
  
  set_target_properties(${LIB_ONERT_BACKEND_CPU} PROPERTIES OUTPUT_NAME backend_cpu)
  
diff --git a/runtime/onert/backend/cpu/KernelGenerator.cc b/runtime/onert/backend/cpu/KernelGenerator.cc

index 59fb68d55c6e0ef1a10c3be5032df7b1b2a5bc46..75274dc884017f512736d32372f609d36126df02 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.cc
+++ b/runtime/onert/backend/cpu/KernelGenerator.cc
@@ -35,6 +35,7 @@
  #include "ops/GatherLayer.h"
  #include "ops/LSTMLayer.h"
  #include "ops/MeanLayer.h"
+#include "ops/DetectionPostProcessLayer.h"
  #include "ops/OneHotLayer.h"
  #include "ops/OperationUtils.h"
  #include "ops/PackLayer.h"
@@ -1177,6 +1178,51 @@ void KernelGenerator::visit(const ir::operation::MatrixBandPart &node)
    _return_fn = std::move(fn);
  }
  
+void KernelGenerator::visit(const ir::operation::DetectionPostProcess &node)
+{
+  using NMS = ir::operation::DetectionPostProcess;
+
+  ops::DetectionPostProcessLayer::DetectionPostProcessParameters parameters;
+  parameters.scales.y = node.param().scale.y_scale;
+  parameters.scales.x = node.param().scale.x_scale;
+  parameters.scales.w = node.param().scale.w_scale;
+  parameters.scales.h = node.param().scale.h_scale;
+
+  parameters.iou_threshold = node.param().iou_threshold;
+  parameters.score_threshold = node.param().score_threshold;
+  parameters.max_boxes_per_class = node.param().max_boxes_per_class;
+  parameters.max_detections = node.param().max_detections;
+  parameters.num_classes = node.param().num_classes;
+  parameters.center_box_format = node.param().center_size_boxes;
+  parameters.max_classes_per_detection = node.param().max_classes_per_detection;
+
+  auto boxes_index = node.getInputs().at(NMS::Input::BOXES);
+  auto scores_index = node.getInputs().at(NMS::Input::SCORES);
+  auto anchors_index = node.getInputs().at(NMS::Input::INPUT_ANCHORS);
+
+  auto o_classes_index = node.getOutputs().at(NMS::Output::BOX_CLASSES);
+  auto o_coords_index = node.getOutputs().at(NMS::Output::BOX_COORDS);
+  auto o_scores_index = node.getOutputs().at(NMS::Output::BOX_SCORES);
+  auto o_num_selected_index = node.getOutputs().at(NMS::Output::NUM_SELECTED);
+
+  parameters.boxes_descr = _ctx.at(boxes_index).shape().dims();
+  parameters.scrores_descr = _ctx.at(scores_index).shape().dims();
+
+  parameters.boxes_input = _tensor_reg->getPortableTensor(boxes_index);
+  parameters.scores_input = _tensor_reg->getPortableTensor(scores_index);
+  parameters.anchors_input = _tensor_reg->getPortableTensor(anchors_index);
+
+  parameters.box_classes_output = _tensor_reg->getPortableTensor(o_classes_index);
+  parameters.box_coords_output = _tensor_reg->getPortableTensor(o_coords_index);
+  parameters.box_scores_output = _tensor_reg->getPortableTensor(o_scores_index);
+  parameters.num_selections_output = _tensor_reg->getPortableTensor(o_num_selected_index);
+
+  auto fn = std::make_unique<ops::DetectionPostProcessLayer>();
+  fn->configure(std::move(parameters));
+
+  _return_fn = std::move(fn);
+}
+
  void KernelGenerator::visit(const ir::operation::BatchMatMul &node)
  {
    const auto output_index{node.getOutputs().at(0)};
diff --git a/runtime/onert/backend/cpu/KernelGenerator.h b/runtime/onert/backend/cpu/KernelGenerator.h

index d452d0ba6c2efb399f259e9ae6249fc5883eed5c..d7d5fe6fcbd12cacb6507e159ddf0ed93dcafb20 100644 (file)
--- a/runtime/onert/backend/cpu/KernelGenerator.h
+++ b/runtime/onert/backend/cpu/KernelGenerator.h
@@ -69,6 +69,7 @@ public:
    void visit(const ir::operation::LogSoftmax &) override;
    void visit(const ir::operation::LSTM &) override;
    void visit(const ir::operation::MatrixBandPart &) override;
+  void visit(const ir::operation::DetectionPostProcess &) override;
    void visit(const ir::operation::OneHot &) override;
    void visit(const ir::operation::Pack &) override;
    void visit(const ir::operation::Pad &) override;
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc

new file mode 100644 (file)

index 0000000..8a6fe65
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.cc
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DetectionPostProcessLayer.h"
+
+#include "ndarray/Array.h"
+
+#include <numeric>
+#include <utility>
+#include <cmath>
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+namespace
+{
+
+using namespace ndarray;
+
+using CenterSizeBox = DetectionPostProcessLayer::CenterSizeBox;
+using CornerBox = DetectionPostProcessLayer::CornerBox;
+
+using NonMaxSuppressionParam = DetectionPostProcessLayer::DetectionPostProcessParameters;
+using Allocations = DetectionPostProcessLayer::Allocations;
+
+struct OutputArrays
+{
+  OutputArrays(CornerBox *coords_buf, float *scores_buf, float *classes_buf,
+               int *num_selections_buf, size_t max_detections)
+    : coords(coords_buf, {max_detections}), scores(scores_buf, {max_detections}),
+      classes(classes_buf, {max_detections}), num_selections(num_selections_buf, {1})
+  {
+  }
+
+  Array<CornerBox> coords;
+  Array<float> scores;
+  Array<float> classes;
+  Array<int> num_selections;
+};
+
+struct TemporaryArrays
+{
+  TemporaryArrays(int *selections_buffer, int max_detections)
+    : selections(selections_buffer, {static_cast<unsigned long>(max_detections)})
+  {
+  }
+
+  Array<int> selections;
+};
+
+// sort indices in decreasing order of first `k` scores
+void PartialArgSort(const ContiguousSpan<float, true> &scores,
+                    const ContiguousSpan<int, false> &indices, int k)
+{
+  std::iota(indices.begin(), indices.begin() + k, 0);
+  std::partial_sort(indices.begin(), indices.begin() + k, indices.begin() + scores.size(),
+                    [&scores](const int i, const int j) { return scores[i] > scores[j]; });
+}
+
+template <typename T> ContiguousSpan<T, false> static vecToSpan(std::vector<T> &v)
+{
+  return ContiguousSpan<T, false>{v.begin(), v.end()};
+}
+
+Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<float> &raw_anchors,
+                                   bool center_box_format, const CenterSizeBox &scales)
+{
+  auto nbatches = raw_boxes.shape().dim(0);
+  auto num_boxes = raw_boxes.shape().dim(1);
+
+  auto anchors = array_cast<const CenterSizeBox>(raw_anchors, {num_boxes});
+
+  if (!center_box_format)
+  {
+    auto boxes_p = reinterpret_cast<const CornerBox *>(raw_boxes.flat().data());
+    return {boxes_p, {num_boxes}};
+  }
+  else
+  {
+    // TODO support box center-width encoding correctly
+    // i.e anchors
+    auto boxes_p = reinterpret_cast<const CenterSizeBox *>(raw_boxes.flat().data());
+    Array<const CenterSizeBox> in_boxes{boxes_p, {num_boxes}};
+
+    auto decoded_boxes_p = new CornerBox[nbatches * num_boxes];
+    Array<CornerBox> decoded_boxes_a{decoded_boxes_p, {num_boxes}};
+
+    for (size_t i = 0; i < num_boxes; ++i)
+    {
+      auto anchor = anchors.at(i);
+      auto &box = decoded_boxes_a.at(i);
+      float yc = in_boxes.at(i).y / scales.y * anchor.h + anchor.y;
+      float xc = in_boxes.at(i).x / scales.x * anchor.w + anchor.x;
+      float halfh = 0.5f * std::exp(in_boxes.at(i).h / scales.h) * anchor.h;
+      float halfw = 0.5f * std::exp(in_boxes.at(i).w / scales.w) * anchor.w;
+      box.x1 = xc - halfw;
+      box.x2 = xc + halfw;
+      box.y1 = yc - halfh;
+      box.y2 = yc + halfh;
+
+      assert(box.x2 > box.x1);
+      assert(box.y2 > box.y1);
+    }
+
+    return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a.shape());
+  }
+}
+
+float computeIOU(const CornerBox &box1, const CornerBox &box2)
+{
+  float area_i = (box1.y2 - box1.y1) * (box1.x2 - box1.x1);
+  float area_j = (box2.y2 - box2.y1) * (box2.x2 - box2.x1);
+  if (area_i <= 0 || area_j <= 0)
+  {
+    return 0.0;
+  }
+  float in_ymin = std::max<float>(box1.y1, box2.y1);
+  float in_xmin = std::max<float>(box1.x1, box2.x1);
+  float in_ymax = std::min<float>(box1.y2, box2.y2);
+  float in_xmax = std::min<float>(box1.x2, box2.x2);
+  float in_area = std::max<float>(in_ymax - in_ymin, 0.0) * std::max<float>(in_xmax - in_xmin, 0.0);
+
+  return in_area / (area_i + area_j - in_area);
+}
+
+int doSingleClass(const Array<const CornerBox> &boxes, const std::vector<float> &scores,
+                  const NonMaxSuppressionParam &param, TemporaryArrays &temps,
+                  size_t max_detections)
+{
+  auto num_boxes = boxes.shape().dim(0);
+
+  std::vector<int> sorted_box_indices(num_boxes);
+  PartialArgSort(ContiguousSpan<float, true>(scores.data(), num_boxes),
+                 vecToSpan(sorted_box_indices), num_boxes);
+
+  // TODO move to temp allocations
+  std::vector<int> process_box(num_boxes, 1);
+
+  size_t selected_count = 0;
+  for (size_t i = 0; i < num_boxes; ++i)
+  {
+    auto box_index = sorted_box_indices[i];
+
+    if (!process_box[box_index] || scores[box_index] < param.score_threshold)
+    {
+      continue;
+    }
+
+    temps.selections.at(selected_count) = box_index;
+    selected_count++;
+
+    if (selected_count >= max_detections)
+    {
+      break;
+    }
+
+    for (size_t j = i + 1; j < num_boxes; ++j)
+    {
+      if (!process_box[sorted_box_indices[j]])
+      {
+        continue;
+      }
+
+      float IOU = computeIOU(boxes.at(box_index), boxes.at(sorted_box_indices[j]));
+      if (IOU > param.iou_threshold)
+      {
+        process_box[sorted_box_indices[j]] = 0;
+      }
+    }
+  }
+
+  return selected_count;
+}
+
+void collectBoxes(TemporaryArrays &temporary, const Array<const CornerBox> &decoded_boxes,
+                  std::vector<float> &scores, int num_selected, OutputArrays &output,
+                  const Array<int> &sorted_classes, int detections_per_box)
+{
+  auto &selections = temporary.selections;
+
+  size_t output_box_count = 0;
+
+  for (int i = 0; i < num_selected; ++i)
+  {
+    int selected_box = selections.at(output_box_count);
+
+    for (int c = 0; c < detections_per_box; ++c)
+    {
+      output.classes.at(output_box_count) = sorted_classes.at(selected_box, c);
+      output.scores.at(output_box_count) = scores[selected_box];
+      output.coords.at(output_box_count) = decoded_boxes.at(selected_box);
+      output_box_count++;
+    }
+  }
+}
+
+void DetectionPostProcess(const Array<float> &boxes_a, const Array<float> &scores_a,
+                          Array<float> &num_selected_a, const NonMaxSuppressionParam &param,
+                          const Allocations &allocations, OutputArrays &outputs)
+{
+  TemporaryArrays temporary(allocations.selections_buffer, param.max_detections);
+
+  // Only batch of 1 is supported atm
+  auto num_boxes = boxes_a.shape().dim(1);
+  size_t num_classes = param.num_classes;
+  size_t num_classes_with_background = scores_a.shape().dim(2);
+  bool have_background = num_classes_with_background != num_classes;
+
+  size_t max_classes_per_box = std::min<size_t>(num_classes, param.max_classes_per_detection);
+
+  // TODO move this to allocations
+  std::vector<int> sorted_class_indices(num_boxes * num_classes);
+
+  Array<int> class_indices(sorted_class_indices.data(), {num_boxes, num_classes});
+
+  // TODO move to allocations
+  std::vector<float> max_scores(num_boxes);
+
+  for (size_t row = 0; row < num_boxes; row++)
+  {
+    auto box_scores = scores_a.slice(0, row).offset(have_background ? 1 : 0);
+    auto indices = class_indices.slice(row);
+
+    PartialArgSort(box_scores, indices, num_classes);
+
+    max_scores[row] = box_scores[indices[0]];
+  }
+
+  auto anchors_a =
+    Array<float>(reinterpret_cast<float *>(param.anchors_input->buffer()), {num_boxes, 4});
+  auto decoded_boxes = decodeBoxes(boxes_a, anchors_a, param.center_box_format, param.scales);
+
+  int num_selected =
+    doSingleClass(decoded_boxes, max_scores, param, temporary, param.max_detections);
+
+  collectBoxes(temporary, decoded_boxes, max_scores, num_selected, outputs, class_indices,
+               max_classes_per_box);
+
+  num_selected_a.at(0) = num_selected;
+}
+} // namespace
+
+template <typename T> Array<T> toArray(uint8_t *ptr, std::vector<int32_t> &descr)
+{
+  ndarray::Shape shape(descr.size());
+  for (size_t i = 0; i < descr.size(); ++i)
+  {
+    shape.dim(i) = descr[i];
+  }
+
+  return Array<T>{reinterpret_cast<T *>(ptr), shape};
+}
+
+void DetectionPostProcessLayer::configure(DetectionPostProcessParameters parameters)
+{
+  _parameters = std::move(parameters);
+  _allocations.selections_buffer = new int[_parameters.max_detections * 2];
+}
+
+void DetectionPostProcessLayer::run()
+{
+  auto nbatches = (unsigned int)_parameters.boxes_descr[0];
+  // no suport for batch other than 1( it's fine since tflite does not support
+  // batch for postprocess either )
+  assert(nbatches == 1);
+
+  auto boxes_a = toArray<float>(_parameters.boxes_input->buffer(), _parameters.boxes_descr);
+  auto scores_a = toArray<float>(_parameters.scores_input->buffer(), _parameters.scrores_descr);
+
+  auto num_selected_a = ndarray::Array<float>(
+    reinterpret_cast<float *>(_parameters.num_selections_output->buffer()), {nbatches});
+
+  OutputArrays outputArrays(reinterpret_cast<CornerBox *>(_parameters.box_coords_output->buffer()),
+                            reinterpret_cast<float *>(_parameters.box_scores_output->buffer()),
+                            reinterpret_cast<float *>(_parameters.box_classes_output->buffer()),
+                            reinterpret_cast<int *>(_parameters.num_selections_output->buffer()),
+                            _parameters.max_detections);
+
+  DetectionPostProcess(boxes_a, scores_a, num_selected_a, _parameters, _allocations, outputArrays);
+}
+
+DetectionPostProcessLayer::~DetectionPostProcessLayer() { delete[] _allocations.selections_buffer; }
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h

new file mode 100644 (file)

index 0000000..836a70c
--- /dev/null
+++ b/runtime/onert/backend/cpu/ops/DetectionPostProcessLayer.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CPU_OPS_DPP_H__
+#define __ONERT_BACKEND_CPU_OPS_DPP_H__
+
+#include <exec/IFunction.h>
+
+#include "OperationUtils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace cpu
+{
+namespace ops
+{
+
+class DetectionPostProcessLayer : public ::onert::exec::IFunction
+{
+public:
+  struct CornerBox
+  {
+    float y1, x1;
+    float y2, x2;
+  };
+
+  struct CenterSizeBox
+  {
+    float y, x;
+    float h, w;
+  };
+
+  struct DetectionPostProcessParameters
+  {
+    const IPortableTensor *boxes_input;
+    const IPortableTensor *scores_input;
+    const IPortableTensor *anchors_input;
+    IPortableTensor *box_coords_output;
+    IPortableTensor *box_classes_output;
+    IPortableTensor *box_scores_output;
+    IPortableTensor *num_selections_output;
+    std::vector<int32_t> boxes_descr;
+    std::vector<int32_t> scrores_descr;
+
+    uint32_t max_detections;
+    float score_threshold;
+    float iou_threshold; // intersection-over-union
+    uint32_t max_boxes_per_class;
+    bool center_box_format = false;
+    int32_t num_classes;
+    int32_t max_classes_per_detection;
+    CenterSizeBox scales;
+  };
+
+  enum SelectionFormat
+  {
+    BOX_INDEX = 1,
+    CLASS_INDEX = 0
+  };
+
+  struct Allocations
+  {
+    int *selections_buffer = nullptr;
+    // TODO move all dynamic allocations here, and into configure phase
+  };
+
+  DetectionPostProcessLayer() : _parameters{}
+  {
+    // DO NOTHING
+  }
+
+  virtual ~DetectionPostProcessLayer();
+
+public:
+  void configure(DetectionPostProcessParameters parameters);
+
+  void run() override;
+
+private:
+  DetectionPostProcessParameters _parameters;
+
+  Allocations _allocations;
+};
+
+} // namespace ops
+} // namespace cpu
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CPU_OPS_DPP_H__
diff --git a/runtime/onert/core/include/compiler/StaticShapeInferer.h b/runtime/onert/core/include/compiler/StaticShapeInferer.h

index 2e484e649f7426ff2468ac417b911500c57b7975..b2272e26279b526e40d44e767e87b28ba211ab6a 100644 (file)
--- a/runtime/onert/core/include/compiler/StaticShapeInferer.h
+++ b/runtime/onert/core/include/compiler/StaticShapeInferer.h
@@ -112,6 +112,7 @@ private:
    void visit(const ir::operation::Transpose &op) override;
    void visit(const ir::operation::Unpack &op) override;
    void visit(const ir::operation::While &op) override;
+  void visit(const ir::operation::DetectionPostProcess &op) override;
  
  private:
    /**
diff --git a/runtime/onert/core/include/exec/DynamicShapeInferer.h b/runtime/onert/core/include/exec/DynamicShapeInferer.h

index 3d040e2cc7cbf36caeb0af8bbe6d83df15354cf8..f814b789a902d8f9d40afa2150840c86c4e91258 100644 (file)
--- a/runtime/onert/core/include/exec/DynamicShapeInferer.h
+++ b/runtime/onert/core/include/exec/DynamicShapeInferer.h
@@ -67,6 +67,7 @@ public:
    void visit(const ir::operation::L2Normalization &op) override;
    void visit(const ir::operation::LSTM &op) override;
    void visit(const ir::operation::MatrixBandPart &op) override;
+  void visit(const ir::operation::DetectionPostProcess &op) override;
    void visit(const ir::operation::OneHot &op) override;
    void visit(const ir::operation::Pack &op) override;
    void visit(const ir::operation::Pad &op) override;
diff --git a/runtime/onert/core/include/ir/Operations.Include.h b/runtime/onert/core/include/ir/Operations.Include.h

index 45fadc47460baef65e0c618c9dea90e43d3d7dfc..0eb45e1ee7f33531e80680abe95d8961d52c4bb0 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.Include.h
+++ b/runtime/onert/core/include/ir/Operations.Include.h
@@ -50,6 +50,7 @@
  #include "ir/operation/LogSoftmax.h"
  #include "ir/operation/LSTM.h"
  #include "ir/operation/MatrixBandPart.h"
+#include "ir/operation/DetectionPostProcess.h"
  #include "ir/operation/OneHot.h"
  #include "ir/operation/Pack.h"
  #include "ir/operation/Pad.h"
diff --git a/runtime/onert/core/include/ir/Operations.lst b/runtime/onert/core/include/ir/Operations.lst

index 7f3c40b4bf83828b327db569e54c63736ba48fd0..f17fdfdd7e57199b9dacf3e940c18fcce78f4c9d 100644 (file)
--- a/runtime/onert/core/include/ir/Operations.lst
+++ b/runtime/onert/core/include/ir/Operations.lst
@@ -53,6 +53,7 @@ OP(LocalResponseNormalization)
  OP(LogSoftmax)
  OP(LSTM)
  OP(MatrixBandPart)
+OP(DetectionPostProcess)
  OP(OneHot)
  OP(Pack)
  OP(Pad)
diff --git a/runtime/onert/core/include/ir/operation/DetectionPostProcess.h b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h

new file mode 100644 (file)

index 0000000..becb0e2
--- /dev/null
+++ b/runtime/onert/core/include/ir/operation/DetectionPostProcess.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+#define __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
+
+#include "ir/Operation.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+class DetectionPostProcess : public Operation
+{
+public:
+  enum Input
+  {
+    BOXES = 0,
+    SCORES = 1,
+    INPUT_ANCHORS = 2
+  };
+
+  enum Output
+  {
+    BOX_COORDS = 0,
+    BOX_CLASSES = 1,
+    BOX_SCORES = 2,
+    NUM_SELECTED = 3
+  };
+
+  struct Scale
+  {
+    float y_scale;
+    float x_scale;
+    float h_scale;
+    float w_scale;
+  };
+
+  struct Param
+  {
+    int max_detections;
+    float score_threshold;
+    float iou_threshold; // intersection-over-union
+    int max_boxes_per_class;
+    int32_t num_classes;
+    int32_t max_classes_per_detection;
+    // N*N complexity instead of N*N*M, where N - number of boxes and M number of classes
+    bool center_size_boxes;
+    bool do_fast_eval = true;
+    Scale scale;
+  };
+
+public:
+  DetectionPostProcess(const OperandIndexSequence &inputs, const OperandIndexSequence &outputs,
+                       const Param &param);
+
+public:
+  void accept(OperationVisitor &v) const override;
+
+  std::string getName() const { return "DetectionPostProcess"; }
+
+public:
+  const Param &param() const { return _param; }
+  OpCode opcode() const final { return OpCode::DetectionPostProcess; }
+
+private:
+  Param _param;
+};
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
+
+#endif // __NEURUN_MODEL_OPERATION_DETECTION_POST_PROCESS_NODE_H__
diff --git a/runtime/onert/core/src/compiler/StaticShapeInferer.cc b/runtime/onert/core/src/compiler/StaticShapeInferer.cc

index 5849a9801c3a056c09bd139c5233ec98a76c4a2d..f2fee2c3cf845149230224a0dec55fe6bcaf6d87 100644 (file)
--- a/runtime/onert/core/src/compiler/StaticShapeInferer.cc
+++ b/runtime/onert/core/src/compiler/StaticShapeInferer.cc
@@ -1302,6 +1302,30 @@ void StaticShapeInferer::visit(const ir::operation::While &op)
    }
  }
  
+void StaticShapeInferer::visit(const ir::operation::DetectionPostProcess &op)
+{
+  // TODO: NMS supports very limited input/output size.
+  ir::operation::DetectionPostProcess::Param param = op.param();
+
+  const int num_detected_boxes = param.max_detections * param.max_classes_per_detection;
+
+  const auto output_idx1 = op.getOutputs().at(0);
+  auto &output1 = _operands.at(output_idx1);
+  output1.info().shape({1, num_detected_boxes, 4});
+
+  const auto output_idx2 = op.getOutputs().at(1);
+  auto &output2 = _operands.at(output_idx2);
+  output2.info().shape({1, num_detected_boxes});
+
+  const auto output_idx3 = op.getOutputs().at(2);
+  auto &output3 = _operands.at(output_idx3);
+  output3.info().shape({1, num_detected_boxes});
+
+  const auto output_idx4 = op.getOutputs().at(3);
+  auto &output4 = _operands.at(output_idx4);
+  output4.info().shape({1});
+}
+
  } // namespace compiler
  
  } // namespace onert
diff --git a/runtime/onert/core/src/exec/DynamicShapeInferer.cc b/runtime/onert/core/src/exec/DynamicShapeInferer.cc

index dbf4eb28f2afda574f610d176f90ac71acf93e29..fb8058d231ca94db4cb4ec8fa1afdf68fcb41086 100644 (file)
--- a/runtime/onert/core/src/exec/DynamicShapeInferer.cc
+++ b/runtime/onert/core/src/exec/DynamicShapeInferer.cc
@@ -601,6 +601,14 @@ void DynamicShapeInferer::visit(const ir::operation::MatrixBandPart &op)
    handleSimpleUnaryOp(op, op.getInputs().at(ir::operation::MatrixBandPart::INPUT));
  }
  
+void DynamicShapeInferer::visit(const ir::operation::DetectionPostProcess & /* op */)
+{
+  // NOTE DetectionPostProcess's undefined outputs' shape are decided on compile time
+  //      by static shape inferer.
+  //      DetectionPostProcess's outputs' shape are independent with input shape
+  //      and decided by parameter value.
+}
+
  void DynamicShapeInferer::visit(const ir::operation::OneHot &op)
  {
    auto output_ind = op.getOutputs().at(0);
diff --git a/runtime/onert/core/src/ir/OperationValidator.cc b/runtime/onert/core/src/ir/OperationValidator.cc

index 705a37e2c6c2db72279a0121d332c243c3af4570..094dbc0d5d901c501e5e0c5995f30ebc648ab2b8 100644 (file)
--- a/runtime/onert/core/src/ir/OperationValidator.cc
+++ b/runtime/onert/core/src/ir/OperationValidator.cc
@@ -211,6 +211,14 @@ void OperationValidator::visit(const operation::DepthToSpace &node)
    OP_REQUIRES(block_size > 0);
  }
  
+void OperationValidator::visit(const operation::DetectionPostProcess &node)
+{
+  auto param = node.param();
+
+  // FIXME: number of classes should be 1 for now.
+  OP_REQUIRES(param.num_classes == 1);
+}
+
  void OperationValidator::visit(const operation::DepthwiseConv2D &node)
  {
    const auto input_index{node.getInputs().at(operation::DepthwiseConv2D::Input::INPUT)};
diff --git a/runtime/onert/core/src/ir/OperationValidator.h b/runtime/onert/core/src/ir/OperationValidator.h

index 9829ca0950a58da40fc02d556a48aafca411c7d5..b9bcc4ee88c3b2f5afff17c1c51e53c35ab4ebf1 100644 (file)
--- a/runtime/onert/core/src/ir/OperationValidator.h
+++ b/runtime/onert/core/src/ir/OperationValidator.h
@@ -55,6 +55,7 @@ public:
    void visit(const operation::Conv2D &node) override;
    void visit(const operation::DepthToSpace &node) override;
    void visit(const operation::DepthwiseConv2D &node) override;
+  void visit(const operation::DetectionPostProcess &node) override;
    void visit(const operation::ElementwiseActivation &node) override;
    void visit(const operation::ElementwiseBinary &node) override;
    void visit(const operation::ElementwiseUnary &node) override;
diff --git a/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc

new file mode 100644 (file)

index 0000000..cd70879
--- /dev/null
+++ b/runtime/onert/core/src/ir/operation/DetectionPostProcess.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/operation/DetectionPostProcess.h"
+#include "ir/OperationVisitor.h"
+
+namespace onert
+{
+namespace ir
+{
+namespace operation
+{
+
+DetectionPostProcess::DetectionPostProcess(const OperandIndexSequence &inputs,
+                                           const OperandIndexSequence &outputs, const Param &param)
+  : Operation(OperandConstraint::createExact(3u), inputs, outputs), _param(param)
+{
+}
+
+void DetectionPostProcess::accept(OperationVisitor &v) const { v.visit(*this); }
+
+} // namespace operation
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/frontend/base_loader/include/base_loader.h b/runtime/onert/frontend/base_loader/include/base_loader.h

index c444e7365771733e5ce3bc2bfd92d47fe067c18c..6ba7ee922c262ce32d16002e369db476651852ba 100644 (file)
--- a/runtime/onert/frontend/base_loader/include/base_loader.h
+++ b/runtime/onert/frontend/base_loader/include/base_loader.h
@@ -142,6 +142,7 @@ private:
    void loadIf(const Operator *op, ir::Graph &subg);
    void loadLeakyRelu(const Operator *op, ir::Graph &subg);
    void loadLogSoftmax(const Operator *op, ir::Graph &subg);
+  void loadDetectionPostProcess(const Operator *op, ir::Graph &subg);
    void loadOneHot(const Operator *op, ir::Graph &subg);
    void loadPack(const Operator *op, ir::Graph &subg);
    void loadPool2D(const Operator *op, ir::Graph &subg, ir::operation::Pool2D::PoolType op_type);
@@ -927,6 +928,45 @@ void BaseLoader<LoaderDomain>::loadGather(const Operator *op, ir::Graph &subg)
    loadOperationTo<ir::operation::Gather>(op, subg, param);
  }
  
+template <typename LoaderDomain>
+void BaseLoader<LoaderDomain>::loadDetectionPostProcess(const Operator *op, ir::Graph &subg)
+{
+  const flexbuffers::Map &m =
+    flexbuffers::GetRoot(op->custom_options()->data(), op->custom_options()->size()).AsMap();
+
+  ir::operation::DetectionPostProcess::Param param;
+
+  param.max_detections = m["max_detections"].AsInt32();
+
+  // TODO fixme
+  param.max_classes_per_detection = m["max_classes_per_detection"].AsInt32();
+  if (m["detections_per_class"].IsNull())
+    param.max_boxes_per_class = 100;
+  else
+    param.max_boxes_per_class = m["detections_per_class"].AsInt32();
+
+  if (m["use_regular_nms"].IsNull())
+    param.do_fast_eval = true;
+  else
+    param.do_fast_eval = !m["use_regular_nms"].AsBool();
+
+  param.score_threshold = m["nms_score_threshold"].AsFloat();
+  param.iou_threshold = m["nms_iou_threshold"].AsFloat();
+
+  // TODO add num classes support
+  param.num_classes = m["num_classes"].AsInt32();
+
+  param.scale.y_scale = m["y_scale"].AsFloat();
+  param.scale.x_scale = m["x_scale"].AsFloat();
+  param.scale.h_scale = m["h_scale"].AsFloat();
+  param.scale.w_scale = m["w_scale"].AsFloat();
+
+  // TODO depends on input model framework
+  param.center_size_boxes = true;
+
+  loadOperationTo<ir::operation::DetectionPostProcess>(op, subg, param);
+}
+
  template <typename LoaderDomain>
  void BaseLoader<LoaderDomain>::loadBatchMatMul(const Operator *op, ir::Graph &subg)
  {
@@ -997,7 +1037,8 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
      BroadcastTo,
      FusedBatchNorm,
      StatelessRandomUniform,
-    Erf
+    Erf,
+    DetectionPostProcess
    };
  
    // Mapping from custom op name string to BuiltinOP enum
@@ -1011,6 +1052,7 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
      {"BroadcastTo", BuiltinOP::BroadcastTo},
      {"StatelessRandomUniform", BuiltinOP::StatelessRandomUniform},
      {"Erf", BuiltinOP::Erf},
+    {"TFLite_Detection_PostProcess", BuiltinOP::DetectionPostProcess},
    };
  
    try
@@ -1046,6 +1088,9 @@ void BaseLoader<LoaderDomain>::loadCustom(const Operator *op, ir::Graph &subg)
        case BuiltinOP::Erf:
          loadElementwiseUnary(op, subg, ir::operation::ElementwiseUnary::Type::ERF);
          break;
+      case BuiltinOP::DetectionPostProcess:
+        loadDetectionPostProcess(op, subg);
+        break;
        default:
          throw std::runtime_error{
            "Loader: Custom OP map is defined but operation loader function is not defined"};
diff --git a/tests/nnfw_api/src/CircleGen.cc b/tests/nnfw_api/src/CircleGen.cc

index 579d68c861db08f7b451cbbb4a8e162901bfff01..0ffc8fb4436616eaed960010b0867dfc10fdd296 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.cc
+++ b/tests/nnfw_api/src/CircleGen.cc
@@ -15,6 +15,7 @@
   */
  
  #include "CircleGen.h"
+#include "flatbuffers/flexbuffers.h"
  
  CircleGen::CircleGen() : _subgraph_contexts(1) // Create primary subgraph
  {
@@ -189,6 +190,35 @@ uint32_t CircleGen::addOperatorDepthwiseConv2D(const OperatorParams &params,
                                  circle::BuiltinOptions_DepthwiseConv2DOptions, options);
  }
  
+uint32_t CircleGen::addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+                                                    float y_scale, float x_scale, float h_scale,
+                                                    float w_scale, float nms_score_threshold,
+                                                    float nms_iou_threshold, int max_detections,
+                                                    int max_classes_per_detection,
+                                                    int detections_per_class)
+{
+  // flexbuffer custom_option
+  auto flex_buffers = std::make_unique<flexbuffers::Builder>();
+  size_t map_start = flex_buffers->StartMap();
+  flex_buffers->Int("num_classes", num_classes);
+  flex_buffers->Float("y_scale", y_scale);
+  flex_buffers->Float("x_scale", x_scale);
+  flex_buffers->Float("h_scale", h_scale);
+  flex_buffers->Float("w_scale", w_scale);
+  flex_buffers->Float("nms_iou_threshold", nms_iou_threshold);
+  flex_buffers->Float("nms_score_threshold", nms_score_threshold);
+  flex_buffers->Int("max_detections", max_detections);
+  flex_buffers->Int("max_classes_per_detection", max_classes_per_detection);
+  flex_buffers->Int("detections_per_class", detections_per_class);
+  flex_buffers->EndMap(map_start);
+  flex_buffers->Finish();
+
+  return addCustomOperatorWithOptions(params, "TFLite_Detection_PostProcess",
+                                      circle::BuiltinOptions_NONE, 0, &flex_buffers->GetBuffer(),
+                                      circle::CustomOptionsFormat::CustomOptionsFormat_FLEXBUFFERS,
+                                      nullptr, nullptr);
+}
+
  uint32_t CircleGen::addOperatorElu(const OperatorParams &params)
  {
    return addOperatorWithOptions(params, circle::BuiltinOperator_ELU, circle::BuiltinOptions_NONE,
@@ -523,6 +553,23 @@ uint32_t CircleGen::addOperatorWithOptions(const OperatorParams &params,
    return ind;
  }
  
+uint32_t CircleGen::addCustomOperatorWithOptions(
+  const OperatorParams &params, std::string custom_code, circle::BuiltinOptions options_type,
+  flatbuffers::Offset<void> options, const std::vector<uint8_t> *custom_options,
+  circle::CustomOptionsFormat custom_options_format,
+  const std::vector<uint8_t> *mutating_variable_inputs, const std::vector<int32_t> *intermediates)
+
+{
+  uint32_t opcode_ind = addCustomOperatorCode(custom_code);
+  auto op = circle::CreateOperatorDirect(
+    _fbb, opcode_ind, &params.inputs, &params.outputs, options_type, options, custom_options,
+    custom_options_format, mutating_variable_inputs, intermediates);
+
+  uint32_t ind = curSubgCtx().operators.size();
+  curSubgCtx().operators.emplace_back(op);
+  return ind;
+}
+
  uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
  {
    // TODO If the same OperatorCode is registered already, just return it
@@ -531,6 +578,15 @@ uint32_t CircleGen::addOperatorCode(circle::BuiltinOperator opcode)
    return ind;
  }
  
+uint32_t CircleGen::addCustomOperatorCode(std::string custom_code)
+{
+  // TODO If the same OperatorCode is registered already, just return it
+  uint32_t ind = _opcodes.size();
+  _opcodes.emplace_back(
+    circle::CreateOperatorCodeDirect(_fbb, circle::BuiltinOperator_CUSTOM, custom_code.c_str()));
+  return ind;
+}
+
  flatbuffers::Offset<circle::Buffer> CircleGen::buildBuffer(const uint8_t *buf, size_t size)
  {
    if (buf == nullptr && size == 0)
diff --git a/tests/nnfw_api/src/CircleGen.h b/tests/nnfw_api/src/CircleGen.h

index ab7707d5aae15c243d1726301481329ce97da0e1..f6f799668a2535b24ef239ecd5517333b8b2c468 100644 (file)
--- a/tests/nnfw_api/src/CircleGen.h
+++ b/tests/nnfw_api/src/CircleGen.h
@@ -159,6 +159,11 @@ public:
                                        int stride_w, int stride_h, int depth_multiplier,
                                        circle::ActivationFunctionType actfn, int dilation_w = 1,
                                        int dilation_h = 1);
+  uint32_t addOperatorDetectionPostProcess(const OperatorParams &params, int num_classes,
+                                           float y_scale, float x_scale, float h_scale,
+                                           float w_scale, float nms_score_threshold,
+                                           float nms_iou_threshold, int max_detections,
+                                           int max_classes_per_detection, int detections_per_class);
    uint32_t addOperatorElu(const OperatorParams &params);
    uint32_t addOperatorEqual(const OperatorParams &params);
    uint32_t addOperatorExpandDims(const OperatorParams &params);
@@ -220,7 +225,15 @@ private:
    uint32_t addOperatorWithOptions(const OperatorParams &params, circle::BuiltinOperator opcode,
                                    circle::BuiltinOptions options_type,
                                    flatbuffers::Offset<void> options);
+  uint32_t addCustomOperatorWithOptions(const OperatorParams &params, std::string custom_code,
+                                        circle::BuiltinOptions options_type,
+                                        flatbuffers::Offset<void> options,
+                                        const std::vector<uint8_t> *custom_options,
+                                        circle::CustomOptionsFormat custom_options_format,
+                                        const std::vector<uint8_t> *mutating_variable_inputs,
+                                        const std::vector<int32_t> *intermediates);
    uint32_t addOperatorCode(circle::BuiltinOperator opcode);
+  uint32_t addCustomOperatorCode(std::string custom_code);
    flatbuffers::Offset<circle::Buffer> buildBuffer(const uint8_t *buf, size_t size);
    flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params);
    flatbuffers::Offset<circle::Tensor> buildTensor(const TensorParams &params, float scale,
diff --git a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc

index 3df7e74039520076f8f69761da6313598809d44b..dda098698c2c591d406eb47ba71787cb55ae3dbd 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
+++ b/tests/nnfw_api/src/one_op_tests/ArgMinMax.cc
@@ -36,28 +36,6 @@ class ArgMinMaxVariation : public GenModelTest,
  // Reduce axis: 1
  // Output shape: {1, 2, 1}
  // Output type: Int32
-TEST_P(ArgMinMaxVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_INT32;
-  std::vector<int32_t> axis_data{1};
-  uint32_t axis_buf = cgen.addBuffer(axis_data);
-  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
-  int out = cgen.addTensor({{1, 2, 1}, output_type});
-  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
-                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
  // Test with different input type and value
  INSTANTIATE_TEST_CASE_P(
    GenModelTest, ArgMinMaxVariation,
@@ -93,6 +71,28 @@ INSTANTIATE_TEST_CASE_P(
        TestCaseData{}.addInput<int8_t>({1, 4, 2, 3}).addOutput<int32_t>({0, 1}), false,
        circle::TensorType::TensorType_INT8, 1.0, 1}));
  
+TEST_P(ArgMinMaxVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  const auto output_type = circle::TensorType::TensorType_INT32;
+  std::vector<int32_t> axis_data{1};
+  uint32_t axis_buf = cgen.addBuffer(axis_data);
+  int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 1}, output_type});
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, OneOp_ArgMax_Int64_AxisToConst)
  {
    CircleGen cgen;
@@ -132,35 +132,41 @@ TEST_F(GenModelTest, OneOp_ArgMax_AxisToVar)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis0)
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis0)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
    const auto output_type = circle::TensorType::TensorType_INT32;
    std::vector<int32_t> axis_data{4};
    uint32_t axis_buf = cgen.addBuffer(axis_data);
    int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
    int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
    _context->expectFailCompile();
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
  
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_ArgMax_InvalidAxis1)
+TEST_P(ArgMinMaxVariation, neg_InvalidAxis1)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
    const auto output_type = circle::TensorType::TensorType_INT32;
    std::vector<int32_t> axis_data{-3};
    uint32_t axis_buf = cgen.addBuffer(axis_data);
    int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{2, 2}, param.input_type}, param.scale, param.zero_point);
    int out = cgen.addTensor({{2}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -188,16 +194,19 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_InType)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_ArgMax_AxisType)
+TEST_P(ArgMinMaxVariation, neg_AxisType)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  const auto output_type = circle::TensorType::TensorType_FLOAT32;
+  const auto output_type = circle::TensorType::TensorType_INT32;
    std::vector<float> axis_data{4};
    uint32_t axis_buf = cgen.addBuffer(axis_data);
    int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
    int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, output_type);
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_type)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_type);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -224,16 +233,20 @@ TEST_F(GenModelTest, neg_OneOp_ArgMax_OutType)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_ArgMax_paramType)
+TEST_P(ArgMinMaxVariation, neg_paramType)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
    const auto output_type = circle::TensorType::TensorType_INT32;
+  const auto output_param = circle::TensorType::TensorType_INT64;
    std::vector<int32_t> axis_data{4};
    uint32_t axis_buf = cgen.addBuffer(axis_data);
    int axis = cgen.addTensor({{1}, circle::TensorType::TensorType_INT32, axis_buf});
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.input_type}, param.scale, param.zero_point);
    int out = cgen.addTensor({{1, 2, 1}, output_type});
-  cgen.addOperatorArgMax({{in, axis}, {out}}, circle::TensorType::TensorType_INT64);
+  param.is_argmax ? cgen.addOperatorArgMax({{in, axis}, {out}}, output_param)
+                  : cgen.addOperatorArgMin({{in, axis}, {out}}, output_param);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
diff --git a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc

index 2fb1d6898be8e9a89e4a89e94026bf7b4fde951d..15ddac2102a6b350583f42d1c1ec8b62f8588ada 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/AveragePool2D.cc
@@ -42,27 +42,6 @@ class AveragePool2DVariation : public GenModelTest,
  {
  };
  
-TEST_P(AveragePool2DVariation, Test)
-{
-  auto &param = GetParam();
-  CircleGen cgen;
-
-  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
-                          param.type.zero_point);
-  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
-                           param.type.zero_point);
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
-                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends(param.backend);
-
-  SUCCEED();
-}
-
  // Test with different input type and value
  INSTANTIATE_TEST_CASE_P(
    GenModelTest, AveragePool2DVariation,
@@ -108,6 +87,27 @@ INSTANTIATE_TEST_CASE_P(
        {circle::TensorType::TensorType_INT8, 2.0, -1},
        {"cpu"}}));
  
+TEST_P(AveragePool2DVariation, Test)
+{
+  auto &param = GetParam();
+  CircleGen cgen;
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, param.param.filter_h,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends(param.backend);
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, neg_OneOp_AvgPool2D_3DInput)
  {
    // 3D Tensors are not supported
@@ -142,13 +142,18 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_2DInput)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType)
+TEST_P(AveragePool2DVariation, neg_InvalidPaddingType)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99), 2, 2, 2, 2,
-                                circle::ActivationFunctionType_NONE);
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, static_cast<circle::Padding>(99),
+                                param.param.stride_w, param.param.stride_h, param.param.filter_w,
+                                param.param.filter_h, circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
@@ -157,12 +162,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidPaddingType)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1)
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_1)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, -1, 2,
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, -1, param.param.filter_h,
                                  circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
@@ -172,12 +182,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_1)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2)
+TEST_P(AveragePool2DVariation, neg_InvalidFilterSize_2)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 2, 2, 2, 0,
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w,
+                                param.param.stride_h, param.param.filter_w, 0,
                                  circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
@@ -187,12 +202,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidFilterSize_2)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1)
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_1)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, 2, 2, 2,
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 0, param.param.stride_h,
+                                param.param.filter_w, param.param.filter_h,
                                  circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
@@ -202,12 +222,17 @@ TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_1)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_AvgPool2D_InvalidStrides_2)
+TEST_P(AveragePool2DVariation, neg_InvalidStrides_2)
  {
+  auto &param = GetParam();
    CircleGen cgen;
-  int in = cgen.addTensor({{2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, 1, -100, 2, 2,
+
+  int in = cgen.addTensor({param.input_shape, param.type.data_type}, param.type.scale,
+                          param.type.zero_point);
+  int out = cgen.addTensor({param.output_shape, param.type.data_type}, param.type.scale,
+                           param.type.zero_point);
+  cgen.addOperatorAveragePool2D({{in}, {out}}, circle::Padding_SAME, param.param.stride_w, -100,
+                                param.param.filter_w, param.param.filter_h,
                                  circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
  
diff --git a/tests/nnfw_api/src/one_op_tests/Concat.cc b/tests/nnfw_api/src/one_op_tests/Concat.cc

index 6e2435965075f8aeaae07d391b3a91795741c59e..f4397ba6644cadeb97911b45f9da9b514eca3e90 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Concat.cc
+++ b/tests/nnfw_api/src/one_op_tests/Concat.cc
@@ -59,25 +59,6 @@ class ConcatVariation : public GenModelTest,
  
  // Input shape: {2, 3} / {2, 3}
  // Output shape: {4, 3}
-TEST_P(ConcatVariation, Test)
-{
-  auto &param = GetParam();
-
-  CircleGen cgen;
-  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
-  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
-  cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
-                                circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({input1, input2}, {output});
-
-  _context = std::make_unique<GenModelTestContext>(cgen.finish());
-  _context->addTestCase(param.tcd);
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
  INSTANTIATE_TEST_CASE_P(
    GenModelTest, ConcatVariation,
    ::testing::Values(
@@ -107,6 +88,25 @@ INSTANTIATE_TEST_CASE_P(
                                               {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}),
                           circle::TensorType::TensorType_INT64}));
  
+TEST_P(ConcatVariation, Test)
+{
+  auto &param = GetParam();
+
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
+  cgen.addOperatorConcatenation({{input1, input2}, {output}}, 0,
+                                circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({input1, input2}, {output});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(param.tcd);
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
  {
    CircleGen cgen;
@@ -180,13 +180,14 @@ TEST_F(GenModelTest, OneOp_Concat_Subtensor_4D)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis)
+TEST_P(ConcatVariation, neg_InvalidAxis)
  {
-  CircleGen cgen;
+  auto &param = GetParam();
  
-  int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int input2 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32});
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
    int axis = 2;
  
    cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
@@ -200,13 +201,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidAxis)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank)
+TEST_P(ConcatVariation, neg_InvalidRank)
  {
-  CircleGen cgen;
+  auto &param = GetParam();
  
-  int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int input2 = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int output = cgen.addTensor({{1, 2, 3}, circle::TensorType::TensorType_FLOAT32});
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{1, 2, 3}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{1, 4, 3}, param.type}, param.scale, param.zero_point);
    int axis = 0;
  
    cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
@@ -220,13 +222,14 @@ TEST_F(GenModelTest, neg_OneOp_Concat_InvalidRank)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Concat_InvalidDimension)
+TEST_P(ConcatVariation, neg_InvalidDimension)
  {
-  CircleGen cgen;
+  auto &param = GetParam();
  
-  int input1 = cgen.addTensor({{2, 3}, circle::TensorType::TensorType_FLOAT32});
-  int input2 = cgen.addTensor({{3, 2}, circle::TensorType::TensorType_FLOAT32});
-  int output = cgen.addTensor({{4, 3}, circle::TensorType::TensorType_FLOAT32});
+  CircleGen cgen;
+  int input1 = cgen.addTensor({{2, 3}, param.type}, param.scale, param.zero_point);
+  int input2 = cgen.addTensor({{3, 2}, param.type}, param.scale, param.zero_point);
+  int output = cgen.addTensor({{4, 3}, param.type}, param.scale, param.zero_point);
    int axis = 0;
  
    cgen.addOperatorConcatenation({{input1, input2}, {output}}, axis,
diff --git a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc

index 9f563401f9ba780b469f7c6888f7b3bc995ece37..a4fe8849315bb2d8a70464e0d81b6001678dd168 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
+++ b/tests/nnfw_api/src/one_op_tests/DepthToSpace.cc
@@ -29,6 +29,9 @@ class DepthToSpaceVariation : public GenModelTest,
  {
  };
  
+// Input shape: {1, 1, 2, 4}
+// Block size: 2
+// Output shape: {1, 2, 4, 1}
  INSTANTIATE_TEST_CASE_P(
    GenModelTest, DepthToSpaceVariation,
    ::testing::Values(
@@ -52,9 +55,6 @@ INSTANTIATE_TEST_CASE_P(
        uniformTCD<int8_t>({{1, 2, 3, 4, 5, 6, 7, 8}}, {{1, 2, 5, 6, 3, 4, 7, 8}}),
        circle::TensorType::TensorType_INT8, 1.0f, -2}));
  
-// Input shape: {1, 1, 2, 4}
-// Block size: 2
-// Output shape: {1, 2, 4, 1}
  TEST_P(DepthToSpaceVariation, Test)
  {
    auto &param = GetParam();
@@ -72,12 +72,13 @@ TEST_P(DepthToSpaceVariation, Test)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_DepthToSpace_Blocksize)
+TEST_P(DepthToSpaceVariation, neg_Blocksize)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  circle::TensorType data_type = circle::TensorType::TensorType_FLOAT32;
-  int in = cgen.addTensor({{1, 1, 2, 4}, data_type});
-  int out = cgen.addTensor({{1, 2, 4, 1}, data_type});
+  int in = cgen.addTensor({{1, 1, 2, 4}, param.type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({{1, 2, 4, 1}, param.type}, param.scale, param.zero_point);
    cgen.addOperatorDepthToSpace({{in}, {out}}, -2);
    cgen.setInputsAndOutputs({in}, {out});
  
diff --git a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc

index 658c44cb9c151b5879d72628b301d0135e856c1d..a0bdbf9e68eba9bfbca7af0f57e3ced7c22ebdb4 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
+++ b/tests/nnfw_api/src/one_op_tests/DepthwiseConv2D.cc
@@ -257,50 +257,6 @@ class DepthwiseConv2DQuantTest
  using DepthwiseConv2DQuantTestParamU8 = DepthwiseConv2DQuantTestParam<uint8_t>;
  using DepthwiseConv2DQuantTestU8 = DepthwiseConv2DQuantTest<uint8_t>;
  
-CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
-{
-  assert(1 <= stride && stride <= 2);
-  assert(1 <= input_depth && input_depth <= 16);
-  assert(1 <= depth_multiplier && depth_multiplier <= 32);
-
-  const int output_depth = input_depth * depth_multiplier;
-  assert(1 <= output_depth && output_depth <= 32);
-
-  CircleGen cgen;
-  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
-    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
-  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
-  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
-  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
-  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
-  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
-  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
-                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
-  cgen.setInputsAndOutputs({in}, {out});
-  return cgen.finish();
-}
-
-TEST_P(DepthwiseConv2DQuantTestU8, Test)
-{
-  // Same input is used for all tests but output differs
-  static const std::vector<uint8_t> input64{
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
-    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
-
-  auto &param = GetParam();
-  _context = std::make_unique<GenModelTestContext>(
-    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
-  std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
-  _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
-
-  SUCCEED();
-}
-
  // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
  // kernels.
  INSTANTIATE_TEST_CASE_P(
@@ -337,10 +293,7 @@ INSTANTIATE_TEST_CASE_P(
      DepthwiseConv2DQuantTestParamU8{
        2, 16, 1, std::vector<uint8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
  
-using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
-using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
-
-CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+CircleBuffer genDepthwiseConv2DQuantU8Model(int stride, int input_depth, int depth_multiplier)
  {
    assert(1 <= stride && stride <= 2);
    assert(1 <= input_depth && input_depth <= 16);
@@ -350,40 +303,43 @@ CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int dep
    assert(1 <= output_depth && output_depth <= 32);
  
    CircleGen cgen;
-  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+  uint32_t ker_buf = cgen.addBuffer(std::vector<uint8_t>{
      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
      2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
      2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
    uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
-  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
-  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_UINT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_UINT8, ker_buf}, 0.5, 0);
    int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
-  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_UINT8}, 1, 0);
    cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
                                    stride, depth_multiplier, circle::ActivationFunctionType_NONE);
    cgen.setInputsAndOutputs({in}, {out});
    return cgen.finish();
  }
  
-TEST_P(DepthwiseConv2DQuantTestI8, Test)
+TEST_P(DepthwiseConv2DQuantTestU8, Test)
  {
    // Same input is used for all tests but output differs
-  static const std::vector<int8_t> input64{
+  static const std::vector<uint8_t> input64{
      0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
      2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
  
    auto &param = GetParam();
    _context = std::make_unique<GenModelTestContext>(
-    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
-  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
-  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+    genDepthwiseConv2DQuantU8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<uint8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<uint8_t>({ref_input}, {param.ref_output}));
    _context->setBackends({"acl_cl", "acl_neon", "cpu"});
  
    SUCCEED();
  }
  
+using DepthwiseConv2DQuantTestParamI8 = DepthwiseConv2DQuantTestParam<int8_t>;
+using DepthwiseConv2DQuantTestI8 = DepthwiseConv2DQuantTest<int8_t>;
+
  // Test with different InputDepth and DepthMultiplier. The values are intended to test optimized CPU
  // kernels.
  INSTANTIATE_TEST_CASE_P(
@@ -420,6 +376,50 @@ INSTANTIATE_TEST_CASE_P(
      DepthwiseConv2DQuantTestParamI8{
        2, 16, 1, std::vector<int8_t>{0, 3, 8, 16, 0, 4, 7, 12, 0, 3, 7, 13, 0, 4, 7, 12}}));
  
+CircleBuffer genDepthwiseConv2DQuantI8Model(int stride, int input_depth, int depth_multiplier)
+{
+  assert(1 <= stride && stride <= 2);
+  assert(1 <= input_depth && input_depth <= 16);
+  assert(1 <= depth_multiplier && depth_multiplier <= 32);
+
+  const int output_depth = input_depth * depth_multiplier;
+  assert(1 <= output_depth && output_depth <= 32);
+
+  CircleGen cgen;
+  uint32_t ker_buf = cgen.addBuffer(std::vector<int8_t>{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1,
+    2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3});
+  uint32_t bias_buf = cgen.addBuffer(std::vector<int32_t>(output_depth, 0));
+  int in = cgen.addTensor({{1, 2, 2, input_depth}, circle::TensorType_INT8}, 0.5, 0);
+  int ker = cgen.addTensor({{1, 2, 2, output_depth}, circle::TensorType_INT8, ker_buf}, 0.5, 0);
+  int bias = cgen.addTensor({{output_depth}, circle::TensorType_INT32, bias_buf}, 0.25, 0);
+  int out = cgen.addTensor({{1, 1, 1, output_depth}, circle::TensorType_INT8}, 1, 0);
+  cgen.addOperatorDepthwiseConv2D({{in, ker, bias}, {out}}, circle::Padding::Padding_VALID, stride,
+                                  stride, depth_multiplier, circle::ActivationFunctionType_NONE);
+  cgen.setInputsAndOutputs({in}, {out});
+  return cgen.finish();
+}
+
+TEST_P(DepthwiseConv2DQuantTestI8, Test)
+{
+  // Same input is used for all tests but output differs
+  static const std::vector<int8_t> input64{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2, 5, 4, 3, 2,
+    2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 4, 6, 8, 2, 3, 5, 8, 8, 5, 3, 2, 1, 2, 3, 4, 5, 4, 3, 2};
+
+  auto &param = GetParam();
+  _context = std::make_unique<GenModelTestContext>(
+    genDepthwiseConv2DQuantI8Model(param.stride, param.input_depth, param.depth_multiplier));
+  std::vector<int8_t> ref_input(input64.begin(), input64.begin() + param.input_depth * 4);
+  _context->addTestCase(uniformTCD<int8_t>({ref_input}, {param.ref_output}));
+  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+
+  SUCCEED();
+}
+
  TEST_F(GenModelTest, neg_OneOp_DepthwiseConv2D_InvalidPaddingType)
  {
    _context = std::make_unique<GenModelTestContext>(genNegTestDepthwiseConv2DModel(
diff --git a/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc

new file mode 100644 (file)

index 0000000..188638b
--- /dev/null
+++ b/tests/nnfw_api/src/one_op_tests/DetectionPostProcess.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "GenModelTest.h"
+
+#include <memory>
+
+TEST_F(GenModelTest, OneOp_DetectionPostProcess_SingleBox)
+{
+  CircleGen cgen;
+
+  int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int scores = cgen.addTensor({{1, 1, 2}, circle::TensorType::TensorType_FLOAT32});
+  int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+  int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorDetectionPostProcess(
+    {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 1, 10, 10, 5, 5,
+    0.8, 0.5, 1, 1, 1);
+  cgen.setInputsAndOutputs({boxes, scores, anchors},
+                           {box_coors, box_classes, box_scores, num_selected});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.9}, {0, 0, 1, 1}},
+                                          {{-0.5, -0.5, 0.5, 0.5}, {0}, {0.9}, {1}}));
+  _context->setBackends({"cpu"});
+
+  SUCCEED();
+}
+
+TEST_F(GenModelTest, neg_OneOp_DetectionPostProcess_SinglBox_MultiClasses)
+{
+  CircleGen cgen;
+
+  int boxes = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int scores = cgen.addTensor({{1, 1, 3}, circle::TensorType::TensorType_FLOAT32});
+  int anchors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+
+  int box_coors = cgen.addTensor({{1, 1, 4}, circle::TensorType::TensorType_FLOAT32});
+  int box_classes = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int box_scores = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+  int num_selected = cgen.addTensor({{1}, circle::TensorType::TensorType_FLOAT32});
+
+  cgen.addOperatorDetectionPostProcess(
+    {{boxes, scores, anchors}, {box_coors, box_classes, box_scores, num_selected}}, 2, 10, 10, 5, 5,
+    0.8, 0.5, 1, 1, 1);
+  cgen.setInputsAndOutputs({boxes, scores, anchors},
+                           {box_coors, box_classes, box_scores, num_selected});
+
+  _context = std::make_unique<GenModelTestContext>(cgen.finish());
+  _context->addTestCase(uniformTCD<float>({{0, 0, 0, 0}, {0, 0.7, 0.9}, {0, 0, 1, 1}},
+                                          {{-0.5, -0.5, 0.5, 0.5}, {1}, {0.9}, {1}}));
+  _context->setBackends({"cpu"});
+  _context->expectFailModelLoad();
+
+  SUCCEED();
+}
diff --git a/tests/nnfw_api/src/one_op_tests/Pad.cc b/tests/nnfw_api/src/one_op_tests/Pad.cc

index 42971da79bff55b054f9ea7fb34730fd34028fdb..c376c1c028d3edac0685ed0e8f95870d484d8928 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Pad.cc
+++ b/tests/nnfw_api/src/one_op_tests/Pad.cc
@@ -31,6 +31,21 @@ class PadVariation : public GenModelTest, public ::testing::WithParamInterface<P
  {
  };
  
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, PadVariation,
+  ::testing::Values(
+    // float value
+    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
+    // uint8 value
+    PadParam{
+      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 8},
+    // int8 value
+    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
+                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
+             circle::TensorType::TensorType_INT8, 1.0, -5}));
+
  TEST_P(PadVariation, Test)
  {
    auto &param = GetParam();
@@ -51,29 +66,16 @@ TEST_P(PadVariation, Test)
    SUCCEED();
  }
  
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, PadVariation,
-  ::testing::Values(
-    // float value
-    PadParam{uniformTCD<float>({{1, 2, 3, 4}}, {{0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}})},
-    // uint8 value
-    PadParam{
-      uniformTCD<uint8_t>({{1, 2, 3, 4}}, {{8, 8, 8, 8, 8, 1, 2, 8, 8, 3, 4, 8, 8, 8, 8, 8}}),
-      circle::TensorType::TensorType_UINT8, 1.0, 8},
-    // int8 value
-    PadParam{uniformTCD<int8_t>({{-2, -1, 1, 2}},
-                                {{-5, -5, -5, -5, -5, -2, -1, -5, -5, 1, 2, -5, -5, -5, -5, -5}}),
-             circle::TensorType::TensorType_INT8, 1.0, -5}));
-
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
+TEST_P(PadVariation, neg_InvalidPadRank)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
    std::vector<int32_t> padding_data{1, 1, 1, 1};
    uint32_t padding_buf = cgen.addBuffer(padding_data);
    int padding = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
  
    cgen.addOperatorPad({{in, padding}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
@@ -85,14 +87,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadRank)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
+TEST_P(PadVariation, neg_InvalidPadDim0)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
    std::vector<int32_t> padding_data{1, 1, 1, 1};
    uint32_t padding_buf = cgen.addBuffer(padding_data);
    int padding = cgen.addTensor({{2, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
  
    cgen.addOperatorPad({{in, padding}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
@@ -104,14 +108,16 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim0)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
+TEST_P(PadVariation, neg_InvalidPadDim1)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 1, 1, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
    std::vector<int32_t> padding_data{1, 1, 1, 1};
    uint32_t padding_buf = cgen.addBuffer(padding_data);
    int padding = cgen.addTensor({{4, 1}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{2, 2, 2, 2}, circle::TensorType::TensorType_FLOAT32});
+  int out = cgen.addTensor({{1, 4, 4, 1}, param.data_type}, param.scale, param.zero_point);
  
    cgen.addOperatorPad({{in, padding}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
@@ -123,14 +129,20 @@ TEST_F(GenModelTest, neg_OneOp_Pad_InvalidPadDim1)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Pad_Type)
+TEST_P(PadVariation, neg_Type)
  {
+  auto &param = GetParam();
+
+  const circle::TensorType output_type = ((param.data_type == circle::TensorType::TensorType_UINT8)
+                                            ? circle::TensorType::TensorType_INT8
+                                            : circle::TensorType::TensorType_UINT8);
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 2, 2, 1}, circle::TensorType::TensorType_FLOAT32});
+  int in = cgen.addTensor({{1, 2, 2, 1}, param.data_type}, param.scale, param.zero_point);
    std::vector<int32_t> padding_data{0, 0, 1, 1, 1, 1, 0, 0};
    uint32_t padding_buf = cgen.addBuffer(padding_data);
    int padding = cgen.addTensor({{4, 2}, circle::TensorType::TensorType_INT32, padding_buf});
-  int out = cgen.addTensor({{1, 4, 4, 1}, circle::TensorType::TensorType_UINT8}, 1.0, 1);
+  int out = cgen.addTensor({{1, 4, 4, 1}, output_type}, 1.0, 0);
  
    cgen.addOperatorPad({{in, padding}, {out}});
    cgen.setInputsAndOutputs({in}, {out});
diff --git a/tests/nnfw_api/src/one_op_tests/Slice.cc b/tests/nnfw_api/src/one_op_tests/Slice.cc

index 960cd88e36cbfe25c68d6070a3349c01cc034e89..002fb01323cc62b59af738affd8552cd9de83c0c 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Slice.cc
+++ b/tests/nnfw_api/src/one_op_tests/Slice.cc
@@ -34,6 +34,32 @@ class SliceVariation : public GenModelTest,
  {
  };
  
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, SliceVariation,
+  ::testing::Values(
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_UINT8,
+      1,
+      0},
+    SliceVariationParam{
+      {2, 2, 3, 1},
+      {0, 1, 1, 0},
+      {1, 1, 2, 1},
+      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
+      circle::TensorType::TensorType_FLOAT32,
+      0,
+      0,
+      circle::TensorType::TensorType_INT64}));
+
  TEST_P(SliceVariation, Test)
  {
    auto &param = GetParam();
@@ -90,32 +116,6 @@ TEST_P(SliceVariation, Test)
    SUCCEED();
  }
  
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, SliceVariation,
-  ::testing::Values(
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}})},
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<uint8_t>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
-      circle::TensorType::TensorType_UINT8,
-      1,
-      0},
-    SliceVariationParam{
-      {2, 2, 3, 1},
-      {0, 1, 1, 0},
-      {1, 1, 2, 1},
-      uniformTCD<float>({{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}}, {{12, 13}}),
-      circle::TensorType::TensorType_FLOAT32,
-      0,
-      0,
-      circle::TensorType::TensorType_INT64}));
-
  TEST_F(GenModelTest, neg_OneOp_Slice_Type)
  {
    CircleGen cgen;
@@ -136,18 +136,48 @@ TEST_F(GenModelTest, neg_OneOp_Slice_Type)
    SUCCEED();
  }
  
-TEST_F(GenModelTest, neg_OneOp_Slice_DiffType)
+TEST_P(SliceVariation, neg_DiffType)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int in = cgen.addTensor({{1, 3, 3, 2}, circle::TensorType::TensorType_FLOAT32});
-  std::vector<int32_t> begins_data = {0, 0, 1, 0};
-  uint32_t begins_buf = cgen.addBuffer(begins_data);
-  int begins = cgen.addTensor({{4}, circle::TensorType::TensorType_INT32, begins_buf});
-  std::vector<int64_t> sizes_data = {1, 2, 1, 1};
-  uint32_t sizes_buf = cgen.addBuffer(sizes_data);
-  int sizes = cgen.addTensor({{4}, circle::TensorType::TensorType_INT64, sizes_buf});
-  int out = cgen.addTensor({{1, 2, 1, 1}, circle::TensorType::TensorType_FLOAT32});
-  cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+
+  int in = cgen.addTensor({param.input_shape, param.input_type}, param.scale, param.zero_point);
+  int out = cgen.addTensor({param.sizes, param.input_type}, param.scale, param.zero_point);
+  if (param.begins_type == circle::TensorType::TensorType_INT32)
+  {
+    uint32_t begins_buf = cgen.addBuffer(param.begins);
+    std::vector<int64_t> sizes_64(param.sizes.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      sizes_64[i] = param.sizes[i];
+    }
+
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(sizes_64);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT64, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
+  else if (param.begins_type == circle::TensorType::TensorType_INT64)
+  {
+    std::vector<int64_t> begins_64(param.begins.size());
+    for (int i = 0; i < param.begins.size(); i++)
+    {
+      begins_64[i] = param.begins[i];
+    }
+
+    uint32_t begins_buf = cgen.addBuffer(begins_64);
+    int rank = param.begins.size();
+    int begins = cgen.addTensor({{rank}, param.begins_type, begins_buf});
+
+    uint32_t sizes_buf = cgen.addBuffer(param.sizes);
+    int sizes = cgen.addTensor({{rank}, circle::TensorType::TensorType_INT32, sizes_buf});
+
+    cgen.addOperatorSlice({{in, begins, sizes}, {out}});
+  }
    cgen.setInputsAndOutputs({in}, {out});
  
    _context = std::make_unique<GenModelTestContext>(cgen.finish());
diff --git a/tests/nnfw_api/src/one_op_tests/Softmax.cc b/tests/nnfw_api/src/one_op_tests/Softmax.cc

index 95debec330d27d5097d23cfbf3c1dcdd39475d24..aba4e89a0178fe06aaab27b982dbf9519b0b8312 100644 (file)
--- a/tests/nnfw_api/src/one_op_tests/Softmax.cc
+++ b/tests/nnfw_api/src/one_op_tests/Softmax.cc
@@ -30,6 +30,23 @@ class SoftmaxVariation : public GenModelTest, public ::testing::WithParamInterfa
  {
  };
  
+// Test with different value type
+INSTANTIATE_TEST_CASE_P(
+  GenModelTest, SoftmaxVariation,
+  ::testing::Values(
+    // float value
+    SoftmaxParam{
+      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
+                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
+    // uint8 value
+    SoftmaxParam{
+      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
+      circle::TensorType::TensorType_UINT8, 1.0, 10},
+    // int8 value
+    SoftmaxParam{
+      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
+      circle::TensorType::TensorType_INT8, 1.0, 0}));
+
  TEST_P(SoftmaxVariation, Test)
  {
    auto &param = GetParam();
@@ -95,28 +112,14 @@ TEST_F(GenModelTest, OneOp_Softmax)
    SUCCEED();
  }
  
-// Test with different value type
-INSTANTIATE_TEST_CASE_P(
-  GenModelTest, SoftmaxVariation,
-  ::testing::Values(
-    // float value
-    SoftmaxParam{
-      uniformTCD<float>({{0, -6, 2, 4, 3, -2, 10, 1}},
-                        {{.23463, .12877, .28658, .35003, .22528, .13664, .45365, .18443}})},
-    // uint8 value
-    SoftmaxParam{
-      uniformTCD<uint8_t>({{10, 4, 12, 14, 13, 8, 20, 11}}, {{60, 33, 73, 90, 58, 35, 116, 47}}),
-      circle::TensorType::TensorType_UINT8, 1.0, 10},
-    // int8 value
-    SoftmaxParam{
-      uniformTCD<int8_t>({{0, -6, 2, 4, 3, -2, 10, 1}}, {{-68, -95, -55, -38, -70, -93, -12, -81}}),
-      circle::TensorType::TensorType_INT8, 1.0, 0}));
-
-TEST_F(GenModelTest, neg_OneOp_Softmax_Type)
+TEST_P(SoftmaxVariation, neg_Type)
  {
+  auto &param = GetParam();
+
    CircleGen cgen;
-  int input = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_FLOAT32});
-  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_INT8}, 1.0, 0);
+  int input =
+    cgen.addTensor({{1, 2, 1, 4}, param.data_type}, param.input_scale, param.input_zero_point);
+  int out = cgen.addTensor({{1, 2, 1, 4}, circle::TensorType::TensorType_BOOL});
    cgen.addOperatorSoftmax({{input}, {out}}, 0.1);
    cgen.setInputsAndOutputs({input}, {out});
  
diff --git a/tools/release_tool/onert_version.sh b/tools/release_tool/onert_version.sh

index 5c875e38bad1427d19058b4488c200ad4c26697d..374a58acf25f8f9716644339e7825a12dd53b00c 100755 (executable)
--- a/tools/release_tool/onert_version.sh
+++ b/tools/release_tool/onert_version.sh
@@ -27,7 +27,7 @@ show_version() {
    current_version=${version_line#"Version:"}
  
    if [ $nightly -eq 0 ]; then
-    echo $current_version~$(date "+%y%m%d%H")
+    echo $current_version~$(date -u "+%y%m%d%H")
    else
      echo $current_version
    fi
author	Chunseok Lee <chunseok.lee@samsung.com>
	Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Tue, 19 Oct 2021 02:32:46 +0000 (11:32 +0900)